class FromIterMemory_int64array(FromiterTemplate, TestCase): ds = dshape('x, int64') count = 1000 p = params(clevel=5) def gen(self): return (i for i in xrange(self.count))
def run_test(args): T = Terminal x = T('x') y = T('y') z = T('z') w = T('w') a = T('a') b = T('b') vm = "python" if "python" in args else "numexpr" print "evaluating expression with '%s' vm..." % vm expr = (x+y).dot(a*z + b*w) print 'opening blaze arrays...' x_ = blaze.open(_persistent_array_names[0]) y_ = blaze.open(_persistent_array_names[1]) z_ = blaze.open(_persistent_array_names[2]) w_ = blaze.open(_persistent_array_names[3]) a_ = 2.0 b_ = 2.0 if 'in_memory' in args: print 'getting an in-memory version of blaze arrays...' params = blaze.params(clevel=0) t0 = time() x_ = blaze.array(x_[:], params=params) y_ = blaze.array(y_[:], params=params) z_ = blaze.array(z_[:], params=params) w_ = blaze.array(w_[:], params=params) print "conversion to blaze in-memory: %.3f" % (time() - t0) print 'datashape is:', x_.datashape if 'print_expr' in args: print expr t_ce = time() expr_vars = {'x': x_, 'y': y_, 'z': z_, 'w': w_, 'a': a_, 'b': b_, } result_ce = expr.eval(expr_vars, params={'vm': vm}) t_ce = time() - t_ce print "'%s' vm result is : %s in %.3f s" % (vm, result_ce, t_ce) # in numpy... print 'evaluating expression with numpy...' x_ = x_[:] y_ = y_[:] z_ = z_[:] w_ = w_[:] t_np = time() result_np = np.dot(x_+y_, a_*z_ + b_*w_) t_np = time() - t_np print 'numpy result is : %s in %.3f s' % (result_np, t_np)
def build_array(array_name, rows): if not os.path.exists(array_name): ds = 'x, float' p = params(clevel=5, storage=array_name) t = fromiter((0.1*i for i in xrange(rows)), dshape=ds, params=p) t.commit() else: t = open(array_name) return t
def sliding_window_blz(dirname, window_size): X = blz.open(dirname) if os.path.exists('result.blz'): shutil.rmtree('result.blz') filtered = blz.array([], dshape=X.datashape, params=blz.params(storage='result.blz')) starts = window_size * [0] + range(1, NROWS - window_size + 1) for i in range(NROWS): start = starts[i] partial = (X[start:i + 1]).mean() filtered.append([partial]) filtered.commit() return filtered
def test_simple_persistent_blob(): td = tempfile.mkdtemp() tmppath = os.path.join(td, 'c') ds = blaze.dshape('x, blob') c = blaze.Array(["s1", "sss2"], ds, params=blaze.params(storage=tmppath)) assert c[0] == "s1" assert c[1] == "sss2" # Remove everything under the temporary dir shutil.rmtree(td)
def run_test(in_memory, args): T = Terminal print 'opening blaze arrays...' x = blaze.open(_persistent_array_names[0]) y = blaze.open(_persistent_array_names[1]) z = blaze.open(_persistent_array_names[2]) w = blaze.open(_persistent_array_names[3]) shape, dtype = blaze.to_numpy(x.datashape) print "***nelements:", shape[0] if in_memory: print 'getting an in-memory version of blaze arrays...' params = blaze.params(clevel=9) t0 = time() x = blaze.array(x[:], params=params) y = blaze.array(y[:], params=params) z = blaze.array(z[:], params=params) w = blaze.array(w[:], params=params) print "conversion to blaze in-memory: %.3f" % (time() - t0) print 'datashape is:', x.datashape print 'evaluating expression with blir...' expr = (T(x) + T(y)).dot(T(2.0) * T(z) + T(2.0) * T(w)) if 'print_expr' in args: print expr.gen_blir()[1] t_ce = time() result_ce = chunked_eval(expr, chunk_size=50000) t_ce = time() - t_ce print 'blir chunked result is : %s in %f s' % (result_ce, t_ce) print '***blir time: %.3f' % t_ce # in numpy... t0 = time() x = x[:] y = y[:] z = z[:] w = w[:] print "conversion to numpy in-memory: %.3f" % (time() - t0) print 'evaluating expression with numpy...' t_np = time() result_np = np.dot(x + y, 2.0 * z + 2.0 * w) t_np = time() - t_np print 'numpy result is : %s in %f s' % (result_np, t_np) print '***numpy time: %.3f' % t_np print '**** %d, %.5f, %.5f' % (shape[0], t_ce, t_np)
def run_test(in_memory, args): T = Terminal print "opening blaze arrays..." x = blaze.open(_persistent_array_names[0]) y = blaze.open(_persistent_array_names[1]) z = blaze.open(_persistent_array_names[2]) w = blaze.open(_persistent_array_names[3]) shape, dtype = blaze.to_numpy(x.datashape) print "***nelements:", shape[0] if in_memory: print "getting an in-memory version of blaze arrays..." params = blaze.params(clevel=9) t0 = time() x = blaze.array(x[:], params=params) y = blaze.array(y[:], params=params) z = blaze.array(z[:], params=params) w = blaze.array(w[:], params=params) print "conversion to blaze in-memory: %.3f" % (time() - t0) print "datashape is:", x.datashape print "evaluating expression with blir..." expr = (T(x) + T(y)).dot(T(2.0) * T(z) + T(2.0) * T(w)) if "print_expr" in args: print expr.gen_blir()[1] t_ce = time() result_ce = chunked_eval(expr, chunk_size=50000) t_ce = time() - t_ce print "blir chunked result is : %s in %f s" % (result_ce, t_ce) print "***blir time: %.3f" % t_ce # in numpy... t0 = time() x = x[:] y = y[:] z = z[:] w = w[:] print "conversion to numpy in-memory: %.3f" % (time() - t0) print "evaluating expression with numpy..." t_np = time() result_np = np.dot(x + y, 2.0 * z + 2.0 * w) t_np = time() - t_np print "numpy result is : %s in %f s" % (result_np, t_np) print "***numpy time: %.3f" % t_np print "**** %d, %.5f, %.5f" % (shape[0], t_ce, t_np)
def test_simple_persistence(): import tempfile, shutil, os.path import numpy as np from blaze import Array, dshape, params ds = dshape('2, 2, float64') data = np.zeros(4).reshape(2,2) td = tempfile.mkdtemp() tmppath = os.path.join(td, 'a') a = Array([1,2,3,4], ds, params=params(storage=tmppath)) # Remove everything under the temporary dir shutil.rmtree(td)
def test_object_persistent_blob(): td = tempfile.mkdtemp() tmppath = os.path.join(td, 'c') ds = blaze.dshape('x, blob') c = blaze.Array([(i, str(i*.2)) for i in range(10)], ds, params=blaze.params(storage=tmppath)) for i, v in enumerate(c): assert v[0] == i assert v[1] == str(i*.2) # Remove everything under the temporary dir shutil.rmtree(td)
def test_object_persistent_blob(): td = tempfile.mkdtemp() tmppath = os.path.join(td, 'c') ds = blaze.dshape('x, blob') c = blaze.Array([(i, str(i * .2)) for i in range(10)], ds, params=blaze.params(storage=tmppath)) for i, v in enumerate(c): assert v[0] == i assert v[1] == str(i * .2) # Remove everything under the temporary dir shutil.rmtree(td)
def test_perserve(): shape = (3, 4) arr = np.ones(shape) dshape = "%s,%s, float64" % (shape[0], shape[1]) path = "p.blz" if os.path.exists(path): shutil.rmtree(path) bparams = blz.params(storage=path) barray = blz.Array(arr, dshape, params=bparams) print "barray:", repr(barray) barray2 = blz.open(path) print "barray2:", repr(barray2) assert (str(barray.datashape) == str(barray2.datashape))
def test_perserve(): shape = (3,4) arr = np.ones(shape) dshape = "%s,%s, float64" % (shape[0], shape[1]) path = "p.blz" if os.path.exists(path): shutil.rmtree(path) bparams = blz.params(storage=path) barray = blz.Array(arr, dshape, params=bparams) print "barray:", repr(barray) barray2 = blz.open(path) print "barray2:", repr(barray2) assert(str(barray.datashape) == str(barray2.datashape))
def build_table(table_name, rows): """build the table to use in our example. if already built just open it""" if not os.path.exists(table_name): ds = 'x, {i: int64; f: float64}' p = params(clevel=5, storage=table_name) t = Table([], dshape=ds, params=p) for i in xrange(rows): t.append((i, random())) t.commit() else: t = open(table_name) return t
def run_test(args): T = Terminal print 'opening blaze arrays...' x = blaze.open(_persistent_array_names[0]) y = blaze.open(_persistent_array_names[1]) z = blaze.open(_persistent_array_names[2]) w = blaze.open(_persistent_array_names[3]) if 'in_memory' in args: print 'getting an in-memory version of blaze arrays...' params = blaze.params(clevel=9) t0 = time() x = blaze.array(x, params=params) y = blaze.array(y, params=params) z = blaze.array(z, params=params) w = blaze.array(w, params=params) print "conversion to blaze in-memory: %.3f" % (time() - t0) print 'datashape is:', x.datashape print 'evaluating expression with blir...' expr = (T(x)+T(y)).dot(T(2.0)*T(z) + T(2.0)*T(w)) if 'print_expr' in args: print expr.gen_blir()[1] t_ce = time() result_ce = chunked_eval(expr, chunk_size=50000) t_ce = time() - t_ce print 'blir chunked result is : %s in %f s' % (result_ce, t_ce) # in numpy... t0 = time() x = x[:] y = y[:] z = z[:] w = w[:] print "Conversion to numpy in-memory: %.3f" % (time() - t0) print 'evaluating expression with numpy...' t_np = time() result_np = np.dot(x+y, 2.0*z + 2.0*w) t_np = time() - t_np print 'numpy result is : %s in %f s' % (result_np, t_np)
def test_getitem_nd_persistent(): import tempfile, shutil, os.path td = tempfile.mkdtemp() path = os.path.join(td, 'test.blz') # write bparams = params(storage=path, clevel=6) nd = ndarr() barray = Array(nd, params=bparams) # read arr = open(path) data = arr[:] assert np.all(data == nd) shutil.rmtree(td)
def test_simple(): if not os.path.exists('./noaa_data'): p = params(clevel=5, storage='./noaa_data') t = Table([], dshape='{f0: int, f1:int, f2:int, f3:float}', params=p) # TODO: chunkwise copy t.append(adapter[:]) t.commit() else: t = open('ctable://noaa_data') print '--------------------------------------' print 'mean', mean(t, 'f3') print 'std', std(t, 'f2') print '--------------------------------------' qs1 = select(t, lambda x: x > 80000, 'f0') qs2 = select2(t, lambda x, y: x > y, ['f0', 'f1']) result = t[qs1]
def test_simple(): if not os.path.exists('./noaa_data'): p = params(clevel=5, storage='./noaa_data') t = Table([], dshape='{f0: int, f1:int, f2:int, f3:float}', params=p) # TODO: chunkwise copy t.append(adapter[:]) t.commit() else: t = open('ctable://noaa_data') print '--------------------------------------' print 'mean', mean(t, 'f3') print 'std', std(t, 'f2') print '--------------------------------------' qs1 = select(t, lambda x: x > 80000, 'f0') qs2 = select2(t, lambda x,y: x > y, ['f0', 'f1']) result = t[qs1]
def make_expression(in_memory=False): T = Terminal expr = (T('x')+T('y')).dot(T('a')*T('z') + T('b')*T('w')) print 'opening blaze arrays...' arrays = map(blaze.open, _persistent_array_names) if (in_memory): print 'getting an in-memory version of blaze arrays...' t0 = time() params = blaze.params(clevel=9) arrays = [blaze.array(array, params=params) for array in arrays] print "conversion to blaze in-memory: %.3f" % (time() - t0) print 'datashape is:', arrays[0].datashape return expr, { 'x': arrays[0], 'y': arrays[1], 'z': arrays[2], 'w': arrays[3], 'a': 2.0, 'b': 2.0 }
def make_expression(in_memory=False): T = Terminal expr = (T('x') + T('y')).dot(T('a') * T('z') + T('b') * T('w')) print 'opening blaze arrays...' arrays = map(blaze.open, _persistent_array_names) if (in_memory): print 'getting an in-memory version of blaze arrays...' t0 = time() params = blaze.params(clevel=9) arrays = [blaze.array(array, params=params) for array in arrays] print "conversion to blaze in-memory: %.3f" % (time() - t0) print 'datashape is:', arrays[0].datashape return expr, { 'x': arrays[0], 'y': arrays[1], 'z': arrays[2], 'w': arrays[3], 'a': 2.0, 'b': 2.0 }
def convert(filetxt, storage): import os.path if not os.path.exists(storage): blaze.Array(np.loadtxt(filetxt), params=blaze.params(storage=storage))
if os.path.exists('result.blz'): shutil.rmtree('result.blz') filtered = blz.array([], dshape=X.datashape, params=blz.params(storage='result.blz')) starts = window_size * [0] + range(1, NROWS - window_size + 1) for i in range(NROWS): start = starts[i] partial = (X[start:i + 1]).mean() filtered.append([partial]) filtered.commit() return filtered if __name__ == '__main__': X = np.random.normal(0, 1, NROWS) #X = np.linspace(0, 1, NROWS) start = time.time() result_numpy = sliding_window_numpy(X, WINDOW_SIZE) if timing: print 'numpy', time.time() - start if os.path.exists('Xp.blz'): shutil.rmtree('Xp.blz') Xp = blz.array(X, params=blz.params(storage='Xp.blz')) start = time.time() result_blaze = sliding_window_blz('Xp.blz', WINDOW_SIZE) if timing: print 'blaze', time.time() - start print "numpy result", result_numpy print "blaze result", result_blaze print "allclose?", np.allclose(result_numpy, result_blaze[:])
def test_ones(): from blaze import ones, params a = ones('10, float64', params=params(clevel=5))
def test_fromiter(): from blaze import fromiter, params a = fromiter(xrange(10), 'x, float64', params=params(clevel=5))
filtered = blz.array([], dshape=X.datashape, params=blz.params(storage='result.blz')) starts = window_size * [0] + range(1, NROWS - window_size + 1) for i in range(NROWS): start = starts[i] partial = (X[start:i + 1]).mean() filtered.append([partial]) filtered.commit() return filtered if __name__ == '__main__': X = np.random.normal(0, 1, NROWS) #X = np.linspace(0, 1, NROWS) start = time.time() result_numpy = sliding_window_numpy(X, WINDOW_SIZE) if timing: print 'numpy', time.time() - start if os.path.exists('Xp.blz'): shutil.rmtree('Xp.blz') Xp = blz.array(X, params=blz.params(storage='Xp.blz')) start = time.time() result_blaze = sliding_window_blz('Xp.blz', WINDOW_SIZE) if timing: print 'blaze', time.time() - start print "numpy result", result_numpy print "blaze result", result_blaze print "allclose?", np.allclose(result_numpy, result_blaze[:])
def _create_persistent_array(name, n, clevel=9): print 'creating ' + name + '...' blaze.array(n, params=blaze.params(storage=name, clevel=clevel))
import shutil """ Benchmark that compares the storing of objects in both Blaze and PyTables """ from time import time import blaze import tables N = 500 if os.path.exists('c'): shutil.rmtree('c') t0 = time() c = blaze.Array([], 'x, object', params=blaze.params(storage='c', clevel=5)) for i in xrange(N): c.append(u"s"*N*i) c.commit() print "time taken for writing in Blaze: %.3f" % (time() - t0) t0 = time() c2 = blaze.open('c') #c2 = c #print c2.datashape tlen = 0 for i in range(N): #print "i:", i, repr(c2[i]), type(c2[i]) tlen += len(c2[i][()])
def _create_persistent_array(name, n, clevel=9): print "creating " + name + "..." blaze.array(n, params=blaze.params(storage=name, clevel=clevel))
import shutil """ Benchmark that compares the storing of objects in both Blaze and PyTables """ from time import time import blaze import tables N = 500 if os.path.exists('c'): shutil.rmtree('c') t0 = time() c = blaze.Array([], 'x, object', params=blaze.params(storage='c', clevel=5)) for i in xrange(N): c.append(u"s" * N * i) c.commit() print "time taken for writing in Blaze: %.3f" % (time() - t0) t0 = time() c2 = blaze.open('c') #c2 = c #print c2.datashape tlen = 0 for i in range(N): #print "i:", i, repr(c2[i]), type(c2[i]) tlen += len(c2[i][()])
def _create_persistent_array(name, dshape): print 'creating ' + name + '...' blaze.ones(dshape, params=blaze.params(storage=name, clevel=0))
import os, shutil, math from time import time import blaze as blz from blaze.ts.ucr_dtw import ucr import numpy as np NROWS = int(1e6) NPERIODS = int(1e4) CS = NROWS / NPERIODS timing = True t0 = time() # Create a large time series dataset: if os.path.exists('ts.blz'): shutil.rmtree('ts.blz') ts = blz.array([], 'x, float64', params=blz.params(storage='ts.blz')) for i in range(NPERIODS): # Proceed to fill the empty array in chunks x = np.linspace(i*math.pi, (i+1)*math.pi, CS) ts.append(x*np.sin(x)) ts.commit() # Create a dataset to query if os.path.exists('query.blz'): shutil.rmtree('query.blz') xq = np.linspace(3*math.pi, 4*math.pi, CS) query = blz.array(xq*np.sin(xq), params=blz.params(storage='query.blz')) if os.path.exists('query2.blz'): shutil.rmtree('query2.blz') n = np.random.randn(query.size)*.1 # introduce some noise query2 = blz.array(xq*np.sin(xq)+n, params=blz.params(storage='query2.blz')) if timing: print "Total Blaze arrays create time :", round(time()-t0, 4)
# Script for benchmarking OOC matrix matrix multiplication (only 2D supported) import shutil, os.path from time import time import blaze from linalg import dot # Remove pre-existent data directories for d in ('a', 'b', 'out'): if os.path.exists(d): shutil.rmtree(d) # Create simple inputs t0 = time() a = blaze.ones(blaze.dshape('2000, 2000, float64'), params=blaze.params(storage='a')) print "Time for matrix a creation : ", round(time()-t0, 3) t0 = time() b = blaze.ones(blaze.dshape('2000, 3000, float64'), params=blaze.params(storage='b')) print "Time for matrix b creation : ", round(time()-t0, 3) # Do the dot product t0 = time() out = dot(a, b, outname='out') print "Time for ooc matmul : ", round(time()-t0, 3) print "out:", out
def dot(a, b, out=None, outname='out'): """ Matrix multiplication of two 2-D arrays. Parameters ---------- a : array First argument. b : array Second argument. out : array, optional Output argument. This must have the exact kind that would be returned if it was not used. In particular, it must have the right type, must be C-contiguous, and its dtype must be the dtype that would be returned for `dot(a,b)`. This is a performance feature. Therefore, if these conditions are not met, an exception is raised, instead of attempting to be flexible. outname : str, optional If provided this will be the name for the output matrix storage. This parameter is only used when `out` is not provided. Returns ------- output : array Returns the dot product of `a` and `b`. If `a` and `b` are both scalars or both 1-D arrays then a scalar is returned; otherwise an array is returned. If `out` is given, then it is returned. Raises ------ ValueError If the last dimension of `a` is not the same size as the second-to-last dimension of `b`. """ a_shape = tuple(i.val for i in a.datashape.parameters[:-1]) b_shape = tuple(i.val for i in b.datashape.parameters[:-1]) if len(a_shape) != 2 or len(b_shape) != 2: raise (ValueError, "only 2-D matrices supported") if a_shape[1] != b_shape[0]: raise (ValueError, "last dimension of `a` does not match first dimension of `b`") l, m, n = a_shape[0], a_shape[1], b_shape[1] if out: out_shape = tuple(i.val for i in out.datashape.parameters[:-1]) if out_shape != (l, n): raise (ValueError, "`out` array does not have the correct shape") else: parms = blaze.params(clevel=5, storage=outname) a_dtype = a.datashape.parameters[-1].to_dtype() dshape = blaze.dshape('%d, %d, %s' % (l, n, a_dtype)) out = blaze.zeros(dshape, parms) # Compute a good block size out_dtype = out.datashape.parameters[-1].to_dtype() bl = math.sqrt(OOC_BUFFER_SIZE / out_dtype.itemsize) bl = 2**int(math.log(bl, 2)) for i in range(0, l, bl): for j in range(0, n, bl): for k in range(0, m, bl): a0 = a[i:min(i+bl, l), k:min(k+bl, m)] b0 = b[k:min(k+bl, m), j:min(j+bl, n)] out[i:i+bl, j:j+bl] += np.dot(a0, b0) return out
# Script for benchmarking OOC matrix matrix multiplication (only 2D supported) import shutil, os.path from time import time import blaze from linalg import dot # Remove pre-existent data directories for d in ('a', 'b', 'out'): if os.path.exists(d): shutil.rmtree(d) # Create simple inputs t0 = time() a = blaze.ones(blaze.dshape('2000, 2000, float64'), params=blaze.params(storage='a')) print "Time for matrix a creation : ", round(time() - t0, 3) t0 = time() b = blaze.ones(blaze.dshape('2000, 3000, float64'), params=blaze.params(storage='b')) print "Time for matrix b creation : ", round(time() - t0, 3) # Do the dot product t0 = time() out = dot(a, b, outname='out') print "Time for ooc matmul : ", round(time() - t0, 3) print "out:", out