def time_concatenate(self): if Suite.style == 'numpy': for _ in xrange(Suite.T): Suite.r = numpy.concatenate(Suite.a, 0) elif Suite.style == 'concat': for _ in xrange(Suite.T): Suite.r = concat(Suite.a) elif Suite.style == 'bcolz': for _ in xrange(Suite.T): Suite.r = append(Suite.a, Suite.clevel)
def test_numpy(): enter() t = np.fromiter((mv + np.random.rand(NC) - mv for i in xrange(int(NR))), dtype=dt) after_create() out = np.fromiter(((row['f1'], row['f3']) for row in t[eval(nquery)]), dtype="f8,f8") after_query() return out
def test02(self): """Testing `fetchwhere` method with a `outcols` with 1 field""" N = self.N ra = np.fromiter(((i, i, i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) ct = t.fetchwhere('f1 < f2', outcols=('f1',)) self.assertEqual(ct.names, ['f1']) l, s = len(ct), ct['f1'].sum() self.assertEqual(l, N - 1) self.assertEqual(s, (N - 1) * (N / 2)) # Gauss summation formula
def test00(self): """Testing `fetchwhere` method with only an expression""" N = self.N ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) ct = t.fetchwhere('f1 < f2') l, s = len(ct), ct['f0'].sum() self.assertEqual(l, N - 1) self.assertEqual(s, (N - 1) * (N / 2)) # Gauss summation formula
def test04(self): """Testing `fetchwhere` method with an `out_flavor` parameter""" N = self.N ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) ct = t.fetchwhere('f1 < f2', out_flavor="numpy") self.assertEqual(type(ct), np.ndarray) l, s = len(ct), ct['f0'].sum() self.assertEqual(l, N - 1) self.assertEqual(s, (N - 1) * (N / 2)) # Gauss summation formula
def test03(self): """Testing `fetchwhere` method with a `limit`, `skip` parameter""" N, M = self.N, 101 ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) l, s = 0, 0 ct = t.fetchwhere('f1 < f2', limit=N - M - 2, skip=M) l, s = len(ct), ct['f0'].sum() self.assertEqual(l, N - M - 2) self.assertEqual(s, np.arange(M + 1, N - 1).sum())
def test05(self): """Testing `fetchwhere` method with global and local variables""" N = self.N lvar = GVAR ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) ct = t.fetchwhere('(f1 + lvar) < (f2 + GVAR)', out_flavor="numpy") self.assertEqual(type(ct), np.ndarray) l, s = len(ct), ct['f0'].sum() self.assertEqual(l, N - 1) self.assertEqual(s, (N - 1) * (N / 2)) # Gauss summation formula
def test00(self): """Testing `whereblocks` method with only an expression""" N = self.N ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) l, s = 0, 0 for block in t.whereblocks('f1 < f2'): l += len(block) s += block['f0'].sum() self.assertEqual(l, N - 1) self.assertEqual(s, (N - 1) * (N / 2)) # Gauss summation formula
def test03(self): """Testing `whereblocks` method with a `outfields` with 1 field""" N = self.N ra = np.fromiter(((i, i, i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) l, s = 0, 0 for block in t.whereblocks('f1 < f2', outfields=('f1', )): self.assertEqual(block.dtype.names, ('f1', )) l += len(block) s += block['f1'].sum() self.assertEqual(l, N - 1) self.assertEqual(s, (N - 1) * (N / 2)) # Gauss summation formula
def test07(self): """Testing `whereblocks` method with a `limit`, `skip` parameter""" N, M = self.N, 101 ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) l, s = 0, 0 for block in t.whereblocks('f1 < f2', limit=N - M - 2, skip=M): l += len(block) s += block['f0'].sum() self.assertEqual(l, N - M - 2) self.assertEqual(s, np.arange(M + 1, N - 1).sum())
def test05(self): """Testing `whereblocks` method with a `limit` parameter""" N, M = self.N, 101 ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) l, s = 0, 0 for block in t.whereblocks('f1 < f2', limit=M): l += len(block) s += block['f0'].sum() self.assertEqual(l, M) self.assertEqual(s, M * ((M + 1) / 2)) # Gauss summation formula
def test03(self): """Testing `whereblocks` method with a `outfields` with 1 field""" N = self.N ra = np.fromiter(((i, i, i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) l, s = 0, 0 for block in t.whereblocks('f1 < f2', outfields=('f1',)): self.assertEqual(block.dtype.names, ('f1',)) l += len(block) s += block['f1'].sum() self.assertEqual(l, N - 1) self.assertEqual(s, (N - 1) * (N / 2)) # Gauss summation formula
def test_ctable(clevel): enter() tc = bcolz.fromiter( (mv + np.random.rand(NC) - mv for i in xrange(int(NR))), dtype=dt, cparams=bcolz.cparams(clevel, cname=cname), count=int(NR)) after_create() out = np.fromiter((row for row in tc.where(squery, 'f1,f3')), dtype="f8,f8") after_query() return out
def test08(self): """Testing `whereblocks` method with global and local variables""" N = self.N lvar = GVAR ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) l, s = 0, 0 for block in t.whereblocks('(f1 + lvar) < (f2 + GVAR)'): l += len(block) s += block['f0'].sum() self.assertEqual(l, N - 1) self.assertEqual(s, (N - 1) * (N / 2)) # Gauss summation formula
def test_numexpr(): import numexpr as ne enter() t = np.fromiter((mv + np.random.rand(NC) - mv for i in xrange(int(NR))), dtype=dt) after_create() map_field = dict(("f%s" % i, t["f%s" % i]) for i in range(NC)) out = np.fromiter( ((row['f1'], row['f3']) for row in t[ne.evaluate(squery, map_field)]), dtype="f8,f8") after_query() return out
def test_numexpr(): import numexpr as ne enter() t = np.fromiter((mv + np.random.rand(NC) - mv for i in xrange(int(NR))), dtype=dt) after_create() map_field = dict(("f%s" % i, t["f%s" % i]) for i in range(NC)) out = np.fromiter(((row['f1'], row['f3']) for row in t[ne.evaluate(squery, map_field)]), dtype="f8,f8") after_query() return out
def test01(self): """Testing `whereblocks` method with a `blen`""" N = self.N ra = np.fromiter(((i, i * 2., i * 3) for i in xrange(N)), dtype='i4,f8,i8') t = bcolz.ctable(ra) l, s = 0, 0 for block in t.whereblocks('f0 <= f1', blen=100): l += len(block) # All blocks should be of length 100, except the last one, # which should be 0 or 20 self.assertTrue(len(block) in (0, 20, 100)) s += block['f0'].sum() self.assertEqual(l, N) self.assertEqual(s, (N - 1) * (N / 2)) # Gauss summation formula
def test_sqlite(): enter() sqlquery = "(f2>.9) and ((f8>.3) and (f8<.4))" # the query con = sqlite3.connect(":memory:") # Create table fields = "(%s)" % ",".join(["f%d real" % i for i in range(NC)]) con.execute("create table bench %s" % fields) # Insert a NR rows of data vals = "(%s)" % ",".join(["?" for i in range(NC)]) with con: con.executemany("insert into bench values %s" % vals, (mv + np.random.rand(NC) - mv for i in xrange(int(NR)))) after_create() out = np.fromiter( (row for row in con.execute("select f1, f3 from bench where %s" % sqlquery)), dtype="f8,f8") after_query("non-indexed") # Create indexes con.execute("CREATE INDEX f1idx ON bench (f1)") con.execute("CREATE INDEX f2idx ON bench (f8)") after_create("index") out = np.fromiter( (row for row in con.execute("select f1, f3 from bench where %s" % sqlquery)), dtype="f8,f8") after_query("indexed") return out
def test_sqlite(): enter() sqlquery = "(f2>.9) and ((f8>.3) and (f8<.4))" # the query con = sqlite3.connect(":memory:") # Create table fields = "(%s)" % ",".join(["f%d real" % i for i in range(NC)]) con.execute("create table bench %s" % fields) # Insert a NR rows of data vals = "(%s)" % ",".join(["?" for i in range(NC)]) with con: con.executemany("insert into bench values %s" % vals, (mv + np.random.rand(NC) - mv for i in xrange(int(NR)))) after_create() out = np.fromiter( (row for row in con.execute( "select f1, f3 from bench where %s" % sqlquery)), dtype="f8,f8") after_query("non-indexed") # Create indexes con.execute("CREATE INDEX f1idx ON bench (f1)") con.execute("CREATE INDEX f2idx ON bench (f8)") after_create("index") out = np.fromiter( (row for row in con.execute( "select f1, f3 from bench where %s" % sqlquery)), dtype="f8,f8") after_query("indexed") return out
def _eval_blocks(expression, vars, vlen, typesize, vm, out_flavor, blen, **kwargs): """Perform the evaluation in blocks.""" if not blen: # Compute the optimal block size (in elements) # The next is based on experiments with bench/ctable-query.py # and the 'movielens-bench' repository if vm == "numexpr": bsize = 2**23 elif vm == "dask": bsize = 2**25 else: # python bsize = 2**21 blen = int(bsize / typesize) # Protection against too large atomsizes if blen == 0: blen = 1 if vm == "dask": if 'da' in vars: raise NameError("'da' is reserved as a prefix for dask.array. " "Please use another prefix") for name in vars: var = vars[name] if is_sequence_like(var): vars[name] = da.from_array(var, chunks=(blen, ) + var.shape[1:]) # Build the expression graph vars['da'] = da da_expr = _eval(expression, vars) if out_flavor in ("bcolz", "carray") and da_expr.shape: result = bcolz.zeros(da_expr.shape, da_expr.dtype, **kwargs) # Store while compute expression graph da.store(da_expr, result) return result else: # Store while compute return np.array(da_expr) # Check whether we have a re_evaluate() function in numexpr re_evaluate = bcolz.numexpr_here and hasattr(bcolz.numexpr, "re_evaluate") vars_ = {} # Get containers for vars maxndims = 0 for name in vars: var = vars[name] if is_sequence_like(var): ndims = len(var.shape) + len(var.dtype.shape) if ndims > maxndims: maxndims = ndims if len(var) > blen and hasattr(var, "_getrange"): shape = (blen, ) + var.shape[1:] vars_[name] = np.empty(shape, dtype=var.dtype) for i in xrange(0, vlen, blen): # Fill buffers for vars for name in vars: var = vars[name] if is_sequence_like(var) and len(var) > blen: if hasattr(var, "_getrange"): if i + blen < vlen: var._getrange(i, blen, vars_[name]) else: vars_[name] = var[i:] else: vars_[name] = var[i:i + blen] else: if hasattr(var, "__getitem__"): vars_[name] = var[:] else: vars_[name] = var # Perform the evaluation for this block if vm == "python": res_block = _eval(expression, vars_) else: if i == 0 or not re_evaluate: try: res_block = bcolz.numexpr.evaluate(expression, local_dict=vars_) except ValueError: # numexpr cannot handle this, so fall back to "python" vm warnings.warn( "numexpr cannot handle this expression: falling back " "to the 'python' virtual machine. You can choose " "another virtual machine by using the `vm` parameter.") return _eval_blocks(expression, vars, vlen, typesize, "python", out_flavor, blen, **kwargs) else: res_block = bcolz.numexpr.re_evaluate(local_dict=vars_) if i == 0: # Detection of reduction operations scalar = False dim_reduction = False if len(res_block.shape) == 0: scalar = True result = res_block continue elif len(res_block.shape) < maxndims: dim_reduction = True result = res_block continue # Get a decent default for expectedlen if out_flavor in ("bcolz", "carray"): nrows = kwargs.pop('expectedlen', vlen) result = bcolz.carray(res_block, expectedlen=nrows, **kwargs) else: out_shape = list(res_block.shape) out_shape[0] = vlen result = np.empty(out_shape, dtype=res_block.dtype) result[:blen] = res_block else: if scalar or dim_reduction: result += res_block elif out_flavor in ("bcolz", "carray"): result.append(res_block) else: result[i:i + blen] = res_block if isinstance(result, bcolz.carray): result.flush() if scalar: return result[()] return result
cparams = bcolz.cparams(clevel) # x = np.arange(N) x = np.zeros(N, dtype="f8") y = x.copy() z = x.copy() cx = bcolz.carray(x, cparams=cparams) cy = cx.copy() cz = cx.copy() ct = bcolz.ctable((cx, cy, cz), names=['x', 'y', 'z']) t = ct[:] print("Starting benchmark now for getting %d elements..." % M) # Retrieve from a ndarray t0 = time() vals = [x[i] for i in xrange(0, M, 3)] print("Time for array--> %.3f" % (time() - t0,)) print("vals-->", len(vals)) #bcolz.set_num_threads(bcolz.ncores//2) # Retrieve from a carray t0 = time() cvals = [cx[i] for i in xrange(0, M, 3)] #cvals = cx[:M:3][:].tolist() print("Time for carray--> %.3f" % (time() - t0,)) print("vals-->", len(cvals)) assert vals == cvals # Retrieve from a structured ndarray t0 = time()
if len(sys.argv) == 2: N, K, T, clevel = (1000000, 10, 3, 1) else: N, K, T = [int(arg) for arg in sys.argv[2:5]] if len(sys.argv) > 5: clevel = int(sys.argv[5]) else: clevel = 0 # The next datasets allow for very high compression ratios a = [numpy.arange(N, dtype='f8') for _ in range(K)] print("problem size: (%d) x %d = 10^%g" % (N, K, math.log10(N * K))) t = time.time() if style == 'numpy': for _ in xrange(T): r = numpy.concatenate(a, 0) elif style == 'concat': for _ in xrange(T): r = concat(a) elif style == 'bcolz': for _ in xrange(T): r = append(a, clevel) t = time.time() - t print('time for concat: %.3fs' % (t / T)) if style == 'bcolz': size = r.cbytes else: size = r.size * r.dtype.itemsize
def _eval_blocks(expression, vars, vlen, typesize, vm, out_flavor, blen, **kwargs): """Perform the evaluation in blocks.""" if not blen: # Compute the optimal block size (in elements) # The next is based on experiments with bench/ctable-query.py # and the 'movielens-bench' repository if vm == "numexpr": bsize = 2**23 elif vm == "dask": bsize = 2**25 else: # python bsize = 2**21 blen = int(bsize / typesize) # Protection against too large atomsizes if blen == 0: blen = 1 if vm == "dask": if 'da' in vars: raise NameError( "'da' is reserved as a prefix for dask.array. " "Please use another prefix") for name in vars: var = vars[name] if is_sequence_like(var): vars[name] = da.from_array(var, chunks=(blen,) + var.shape[1:]) # Build the expression graph vars['da'] = da da_expr = _eval(expression, vars) if out_flavor in ("bcolz", "carray") and da_expr.shape: result = bcolz.zeros(da_expr.shape, da_expr.dtype, **kwargs) # Store while compute expression graph da.store(da_expr, result) return result else: # Store while compute return np.array(da_expr) # Check whether we have a re_evaluate() function in numexpr re_evaluate = bcolz.numexpr_here and hasattr(bcolz.numexpr, "re_evaluate") vars_ = {} # Get containers for vars maxndims = 0 for name in vars: var = vars[name] if is_sequence_like(var): ndims = len(var.shape) + len(var.dtype.shape) if ndims > maxndims: maxndims = ndims if len(var) > blen and hasattr(var, "_getrange"): shape = (blen, ) + var.shape[1:] vars_[name] = np.empty(shape, dtype=var.dtype) for i in xrange(0, vlen, blen): # Fill buffers for vars for name in vars: var = vars[name] if is_sequence_like(var) and len(var) > blen: if hasattr(var, "_getrange"): if i+blen < vlen: var._getrange(i, blen, vars_[name]) else: vars_[name] = var[i:] else: vars_[name] = var[i:i+blen] else: if hasattr(var, "__getitem__"): vars_[name] = var[:] else: vars_[name] = var # Perform the evaluation for this block if vm == "python": res_block = _eval(expression, vars_) else: if i == 0 or not re_evaluate: try: res_block = bcolz.numexpr.evaluate(expression, local_dict=vars_) except ValueError: # numexpr cannot handle this, so fall back to "python" vm warnings.warn( "numexpr cannot handle this expression: falling back " "to the 'python' virtual machine. You can choose " "another virtual machine by using the `vm` parameter.") return _eval_blocks( expression, vars, vlen, typesize, "python", out_flavor, blen, **kwargs) else: res_block = bcolz.numexpr.re_evaluate(local_dict=vars_) if i == 0: # Detection of reduction operations scalar = False dim_reduction = False if len(res_block.shape) == 0: scalar = True result = res_block continue elif len(res_block.shape) < maxndims: dim_reduction = True result = res_block continue # Get a decent default for expectedlen if out_flavor in ("bcolz", "carray"): nrows = kwargs.pop('expectedlen', vlen) result = bcolz.carray(res_block, expectedlen=nrows, **kwargs) else: out_shape = list(res_block.shape) out_shape[0] = vlen result = np.empty(out_shape, dtype=res_block.dtype) result[:blen] = res_block else: if scalar or dim_reduction: result += res_block elif out_flavor in ("bcolz", "carray"): result.append(res_block) else: result[i:i+blen] = res_block if isinstance(result, bcolz.carray): result.flush() if scalar: return result[()] return result
def _eval_blocks(expression, vars, vlen, typesize, vm, out_flavor, **kwargs): """Perform the evaluation in blocks.""" # Compute the optimal block size (in elements) # The next is based on experiments with bench/ctable-query.py # and the 'movielens-bench' repository if vm == "numexpr": bsize = 2**24 else: bsize = 2**22 bsize //= typesize # Evaluation seems more efficient if block size is a power of 2 bsize = 2 ** (int(math.log(bsize, 2))) if vlen < 100*1000: bsize //= 8 elif vlen < 1000*1000: bsize //= 4 elif vlen < 10*1000*1000: bsize //= 2 # Protection against too large atomsizes if bsize == 0: bsize = 1 vars_ = {} # Get temporaries for vars maxndims = 0 for name in vars: var = vars[name] if hasattr(var, "__len__"): ndims = len(var.shape) + len(var.dtype.shape) if ndims > maxndims: maxndims = ndims if len(var) > bsize and hasattr(var, "_getrange"): vars_[name] = np.empty(bsize, dtype=var.dtype) for i in xrange(0, vlen, bsize): # Get buffers for vars for name in vars: var = vars[name] if hasattr(var, "__len__") and len(var) > bsize: if hasattr(var, "_getrange"): if i+bsize < vlen: var._getrange(i, bsize, vars_[name]) else: vars_[name] = var[i:] else: vars_[name] = var[i:i+bsize] else: if hasattr(var, "__getitem__"): vars_[name] = var[:] else: vars_[name] = var # Perform the evaluation for this block if vm == "python": res_block = _eval(expression, vars_) else: try: res_block = bcolz.numexpr.evaluate(expression, local_dict=vars_) except ValueError: # numexpr cannot handle this. Fall back to a pure "python" VM. return _eval_blocks( expression, vars, vlen, typesize, "python", out_flavor, **kwargs) if i == 0: # Detection of reduction operations scalar = False dim_reduction = False if len(res_block.shape) == 0: scalar = True result = res_block continue elif len(res_block.shape) < maxndims: dim_reduction = True result = res_block continue # Get a decent default for expectedlen if out_flavor == "carray": nrows = kwargs.pop('expectedlen', vlen) result = bcolz.carray(res_block, expectedlen=nrows, **kwargs) else: out_shape = list(res_block.shape) out_shape[0] = vlen result = np.empty(out_shape, dtype=res_block.dtype) result[:bsize] = res_block else: if scalar or dim_reduction: result += res_block elif out_flavor == "carray": result.append(res_block) else: result[i:i+bsize] = res_block if isinstance(result, bcolz.carray): result.flush() if scalar: return result[()] return result
from time import time import numpy as np from numpy.testing import assert_array_equal import bcolz from bcolz.py2help import xrange, izip N = int(1e6) # the number of elements in x clevel = 2 # the compression level print("Creating inputs with %d elements..." % N) x = xrange(N) # not a true iterable, but can be converted y = xrange(1, N + 1) z = xrange(2, N + 2) print("Starting benchmark now for creating arrays...") # Create a ndarray # x = (i for i in xrange(N)) # true iterable t0 = time() out = np.fromiter(x, dtype='f8', count=N) print("Time for array--> %.3f" % (time() - t0,)) print("out-->", len(out)) #bcolz.set_num_threads(bcolz.ncores//2) # Create a carray #x = (i for i in xrange(N)) # true iterable
# Benchmark for assessing the `fromiter()` speed. from time import time import numpy as np from numpy.testing import assert_array_equal import bcolz from bcolz.py2help import xrange, izip N = int(1e6) # the number of elements in x clevel = 2 # the compression level print("Creating inputs with %d elements..." % N) x = xrange(N) # not a true iterable, but can be converted y = xrange(1, N + 1) z = xrange(2, N + 2) print("Starting benchmark now for creating arrays...") # Create a ndarray # x = (i for i in xrange(N)) # true iterable t0 = time() out = np.fromiter(x, dtype='f8', count=N) print("Time for array--> %.3f" % (time() - t0, )) print("out-->", len(out)) #bcolz.set_num_threads(bcolz.ncores//2) # Create a carray #x = (i for i in xrange(N)) # true iterable