def test03(self): """Testing copy() with no shuffle""" a = np.linspace(-1., 1., 1e4) b = blz.barray(a, rootdir=self.rootdir) c = b.copy(bparams=blz.bparams(shuffle=False)) #print "b.cbytes, c.cbytes:", b.cbytes, c.cbytes self.assert_(b.cbytes < c.cbytes, "shuffle not changed")
def test02(self): """Testing copy() with lesser compression""" a = np.linspace(-1., 1., 1e4) b = blz.barray(a, rootdir=self.rootdir) c = b.copy(bparams=blz.bparams(clevel=1)) #print "b.cbytes, c.cbytes:", b.cbytes, c.cbytes self.assert_(b.cbytes < c.cbytes, "clevel not changed")
def compute_barray(sexpr, clevel, kernel): # Uncomment the next for disabling threading #blz.set_nthreads(1) #blz.blosc_set_nthreads(1) print("*** barray (using compression clevel = %d):" % clevel) x = cx # comment this for using numpy arrays in inputs t0 = time() cout = blz.eval(sexpr, vm=kernel, bparams=blz.bparams(clevel)) print("Time for blz.eval (%s) --> %.3f" % (kernel, time()-t0,))
def compute_blz(sexpr, clevel, kernel): # Uncomment the next for disabling threading # Maybe due to some contention between Numexpr and Blosc? # blz.set_nthreads(blz.ncores//2) print "*** blz (using compression clevel = %d):" % clevel if clevel > 0: x, y, z = cx, cy, cz t0 = time() cout = blz.eval(sexpr, vm=kernel, bparams=blz.bparams(clevel)) print "Time for blz.eval (%s) --> %.3f" % (kernel, time()-t0,), print ", cratio (out): %.1f" % (cout.nbytes / float(cout.cbytes))
def test_btable(clevel): enter() tc = blz.fromiter((mv+np.random.rand(NC)-mv for i in xrange(int(NR))), dtype=dt, bparams=blz.bparams(clevel), count=int(NR)) after_create() out = np.fromiter((row for row in tc.where(squery, 'f1,f3')), dtype="f8,f8") after_query() return out
def test_btable(clevel): enter() tc = blz.fromiter((mv + np.random.rand(NC) - mv for i in xrange(int(NR))), dtype=dt, bparams=blz.bparams(clevel, cname='lz4'), count=int(NR)) after_create() out = np.fromiter((row for row in tc.where(squery, 'f1,f3')), dtype="f8,f8") after_query() return out
def test01a(self): """Testing `__setitem()__` method with start,stop (scalar)""" a = np.ones((500,200), dtype="i4")*3 b = blz.fill((500,200), 3, dtype="i4", rootdir=self.rootdir, bparams=blz.bparams()) sl = slice(100,400) a[sl,:] = 0 b[sl] = 0 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def compute_blz(sexpr, clevel, kernel): # Uncomment the next for disabling threading # Maybe due to some contention between Numexpr and Blosc? # blz.set_nthreads(blz.ncores//2) print "*** blz (using compression clevel = %d):" % clevel if clevel > 0: x, y, z = cx, cy, cz t0 = time() cout = blz.eval(sexpr, vm=kernel, bparams=blz.bparams(clevel)) print "Time for blz.eval (%s) --> %.3f" % ( kernel, time() - t0, ), print ", cratio (out): %.1f" % (cout.nbytes / float(cout.cbytes))
def test01a(self): """Testing `__setitem()__` method with start,stop (scalar)""" a = np.ones((500, 200), dtype="i4") * 3 b = blz.fill((500, 200), 3, dtype="i4", rootdir=self.rootdir, bparams=blz.bparams()) sl = slice(100, 400) a[sl, :] = 0 b[sl] = 0 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
import numpy as np import blz from time import time N = 1e8 # a = np.arange(N, dtype='f8') a = np.random.randint(0, 10, N).astype("bool") t0 = time() sa = a.sum() print "Time sum() numpy --> %.3f" % (time() - t0) t0 = time() ac = blz.barray(a, bparams=blz.bparams(9)) print "Time barry conv --> %.3f" % (time() - t0) print "ac-->", ` ac ` t0 = time() sac = ac.sum() # sac = ac.sum(dtype=np.dtype('i8')) print "Time sum() barray --> %.3f" % (time() - t0) # t0 = time() # sac = sum(i for i in ac) # print "Time sum() carray (iter) --> %.3f" % (time()-t0) print "sa, sac-->", sa, sac, type(sa), type(sac) assert sa == sac
#print "cout-->", repr(cout) if __name__=="__main__": N = 1e8 # the number of elements in x clevel = 5 # the compression level sexpr = "(x+1)<0" sexpr = "(((.25*x + .75)*x - 1.5)*x - 2)<0" #sexpr = "(((.25*x + .75)*x - 1.5)*x - 2)" doprofile = True print("Creating inputs...") x = np.arange(N) #x = np.linspace(0,100,N) cx = blz.barray(x, bparams=blz.bparams(clevel)) print("Evaluating '%s' with 10^%d points" % (sexpr, int(math.log10(N)))) t0 = time() cout = ne.evaluate(sexpr) print "Time for numexpr --> %.3f" % (time()-t0,) if doprofile: import pstats import cProfile as prof prof.run('compute_barray(sexpr, clevel=clevel, kernel="numexpr")', #prof.run('compute_barray(sexpr, clevel=clevel, kernel="python")', 'eval.prof') stats = pstats.Stats('eval.prof') stats.strip_dirs()
print(c) print(c.dshape) # or as many dimensions as you like print_section('going 3d', level=1) d = blaze.array([ [ [1, 2], [3, 4] ], [ [5, 6], [7, 8] ] ]) print(d) print(d.dshape) # -------------------------------------------------------------------- print_section ('building compressed in-memory arrays') # A compressed array (backed by BLZ): import blz datadesc = blaze.BLZ_DDesc(mode='w', bparams=blz.bparams(clevel=5)) arr = blaze.array([1,2,3]) print(arr) # -------------------------------------------------------------------- print_section('Explicit types in construction') # It is possible to force a type in a given array. This allows a # broader selection of types on construction. e = blaze.array([1, 2, 3], dshape='3 * float32') print(e) # Note that the dimensions in the datashape when creating from a # collection can be omitted. If that's the case, the dimensions will # be inferred. The following is thus equivalent:
def append(data, clevel, cname): alldata = blz.barray(data[0], bparams=blz.bparams(clevel, cname=cname)) for carr in data[1:]: alldata.append(carr) return alldata
def test_create_compress_iter(self): # A compressed array (backed by BLZ) ddesc = BLZ_DDesc(mode='w', bparams=blz.bparams(clevel=5)) a = blaze.array((i for i in range(10)), ddesc=ddesc) self.assertTrue(isinstance(a, blaze.Array)) self.assertEqual(list(a), list(range(10)))
def test03(self): """Testing `__getitem()__` method with ranges and steps""" a = np.arange(1e3) b = chunk(a, atom=a.dtype, bparams=blz.bparams()) #print "b[1:8:3]->", `b[1:8:3]` assert_array_equal(a[1:8:3], b[1:8:3], "Arrays are not equal")
# Benchmark for getitem import numpy as np import blz from time import time N = 1e7 # the number of elements in x M = 100000 # the elements to get clevel = 1 # the compression level print "Creating inputs with %d elements..." % N bparams = blz.bparams(clevel) #x = np.arange(N) x = np.zeros(N, dtype="f8") y = x.copy() z = x.copy() cx = blz.barray(x, bparams=bparams) cy = cx.copy() cz = cx.copy() ct = blz.btable((cx, cy, cz), names=['x', 'y', 'z']) t = ct[:] print "Starting benchmark now for getting %d elements..." % M # Retrieve from a ndarray t0 = time() vals = [x[i] for i in xrange(0, M, 3)] print "Time for array--> %.3f" % (time() - t0, ) print "vals-->", len(vals)
# blz.set_nthreads(blz.ncores//2) print "*** blz (using compression clevel = %d):" % clevel if clevel > 0: x, y, z = cx, cy, cz t0 = time() cout = blz.eval(sexpr, vm=kernel, bparams=blz.bparams(clevel)) print "Time for blz.eval (%s) --> %.3f" % (kernel, time()-t0,), print ", cratio (out): %.1f" % (cout.nbytes / float(cout.cbytes)) #print "cout-->", repr(cout) if __name__=="__main__": print "Creating inputs..." bparams = blz.bparams(clevel) y = x.copy() z = x.copy() cx = blz.barray(x, bparams=bparams) cy = blz.barray(y, bparams=bparams) cz = blz.barray(z, bparams=bparams) for sexpr in sexprs: print "Evaluating '%s' with 10^%d points" % (sexpr, int(math.log10(N))) compute_ref(sexpr) for kernel in "python", "numexpr": compute_blz(sexpr, clevel=0, kernel=kernel) if doprofile: import pstats import cProfile as prof
z = xrange(2,N+2) print "Starting benchmark now for creating arrays..." # Create a ndarray #x = (i for i in xrange(N)) # true iterable t0 = time() out = np.fromiter(x, dtype='f8', count=N) print "Time for ndarray--> %.3f" % (time()-t0,) print "out-->", len(out) #blz.set_num_threads(blz.ncores//2) # Create a barray #x = (i for i in xrange(N)) # true iterable t0 = time() cout = blz.fromiter(x, dtype='f8', count=N, bparams=blz.bparams(clevel)) print "Time for barray--> %.3f" % (time()-t0,) print "cout-->", len(cout) #assert_array_equal(out, cout, "Arrays are not equal") # Create a barray (with unknown size) #x = (i for i in xrange(N)) # true iterable t0 = time() cout = blz.fromiter(x, dtype='f8', count=-1, bparams=blz.bparams(clevel)) print "Time for barray (count=-1)--> %.3f" % (time()-t0,) print "cout-->", len(cout) #assert_array_equal(out, cout, "Arrays are not equal") # Retrieve from a structured ndarray gen = ((i,j,k) for i,j,k in it.izip(x,y,z)) t0 = time()
def test_create_compress(self): # A compressed array (backed by BLZ) ddesc = BLZ_DDesc(mode='w', bparams=blz.bparams(clevel=5)) a = blaze.array(np.arange(1, 4), ddesc=ddesc) self.assertTrue(isinstance(a, blaze.Array)) self.assertEqual(list(a), [1, 2, 3])
import numpy as np import blz from time import time N = 1e8 #a = np.arange(N, dtype='f8') a = np.random.randint(0, 10, N).astype('bool') t0 = time() sa = a.sum() print "Time sum() numpy --> %.3f" % (time() - t0) t0 = time() ac = blz.barray(a, bparams=blz.bparams(9)) print "Time barry conv --> %.3f" % (time() - t0) print "ac-->", ` ac ` t0 = time() sac = ac.sum() #sac = ac.sum(dtype=np.dtype('i8')) print "Time sum() barray --> %.3f" % (time() - t0) # t0 = time() # sac = sum(i for i in ac) # print "Time sum() carray (iter) --> %.3f" % (time()-t0) print "sa, sac-->", sa, sac, type(sa), type(sac) assert (sa == sac)
def test01(self): """Testing `__getitem()__` method with scalars""" a = np.arange(1e3) b = chunk(a, atom=a.dtype, bparams=blz.bparams()) #print "b[1]->", `b[1]` self.assert_(a[1] == b[1], "Values in key 1 are not equal")
import numpy as np import blz from time import time N = 100 * 1000 * 1000 CLEVEL = 5 a = np.linspace(0, 1, N) t0 = time() ac = blz.barray(a, bparams=blz.bparams(clevel=CLEVEL)) print "time creation (memory) ->", round(time()-t0, 3) print "data (memory):", repr(ac) t0 = time() b = blz.barray(a, bparams=blz.bparams(clevel=CLEVEL), rootdir='myarray', mode='w') b.flush() print "time creation (disk) ->", round(time()-t0, 3) #print "meta (disk):", b.read_meta() t0 = time() an = np.array(a) print "time creation (numpy) ->", round(time()-t0, 3) t0 = time() c = blz.barray(rootdir='myarray') print "time open (disk) ->", round(time()-t0, 3) #print "meta (disk):", c.read_meta() print "data (disk):", repr(c)
z = xrange(2, N + 2) print "Starting benchmark now for creating arrays..." # Create a ndarray #x = (i for i in xrange(N)) # true iterable t0 = time() out = np.fromiter(x, dtype='f8', count=N) print "Time for ndarray--> %.3f" % (time() - t0, ) print "out-->", len(out) #blz.set_num_threads(blz.ncores//2) # Create a barray #x = (i for i in xrange(N)) # true iterable t0 = time() cout = blz.fromiter(x, dtype='f8', count=N, bparams=blz.bparams(clevel)) print "Time for barray--> %.3f" % (time() - t0, ) print "cout-->", len(cout) #assert_array_equal(out, cout, "Arrays are not equal") # Create a barray (with unknown size) #x = (i for i in xrange(N)) # true iterable t0 = time() cout = blz.fromiter(x, dtype='f8', count=-1, bparams=blz.bparams(clevel)) print "Time for barray (count=-1)--> %.3f" % (time() - t0, ) print "cout-->", len(cout) #assert_array_equal(out, cout, "Arrays are not equal") # Retrieve from a structured ndarray gen = ((i, j, k) for i, j, k in it.izip(x, y, z)) t0 = time()
def test_create_compress_ones(self): # A compressed array (backed by BLZ) ddesc = BLZ_DDesc(mode='w', bparams=blz.bparams(clevel=5)) a = blaze.ones('10 * int64', ddesc=ddesc) self.assertTrue(isinstance(a, blaze.Array)) self.assertEqual(list(a), [1]*10)
def test_create_compress(self): # A compressed array (backed by BLZ) ddesc = BLZ_DDesc(mode='w', bparams=blz.bparams(clevel=5)) a = blaze.array(np.arange(1,4), ddesc=ddesc) self.assertTrue(isinstance(a, blaze.Array)) self.assertEqual(list(a), [1, 2, 3])
def test_create_compress_zeros(self): # A compressed array (backed by BLZ) ddesc = BLZ_DDesc(mode='w', bparams=blz.bparams(clevel=5)) a = blaze.zeros('10 * int64', ddesc=ddesc) self.assertTrue(isinstance(a, blaze.Array)) self.assertEqual(ddesc_as_py(a.ddesc), [0]*10)
def test_create_compress_ones(self): # A compressed array (backed by BLZ) ddesc = BLZ_DDesc(mode='w', bparams=blz.bparams(clevel=5)) a = blaze.ones('10 * int64', ddesc=ddesc) self.assertTrue(isinstance(a, blaze.Array)) self.assertEqual(list(a), [1] * 10)
def test04(self): """Testing `__getitem()__` method with long ranges""" a = np.arange(1e4) b = chunk(a, atom=a.dtype, bparams=blz.bparams()) #print "b[1:8000]->", `b[1:8000]` assert_array_equal(a[1:8000], b[1:8000], "Arrays are not equal")