def test02(self): """Testing copy() with lower clevel""" N = 10*1000 ra = np.fromiter(((i, i**2.2) for i in xrange(N)), dtype='i4,f8') t = ca.ctable(ra) t2 = t.copy(cparams=ca.cparams(clevel=1)) self.assert_(t.cparams.clevel == ca.cparams().clevel) self.assert_(t2.cparams.clevel == 1) #print "cbytes in f1, f2:", t['f1'].cbytes, t2['f1'].cbytes self.assert_(t['f1'].cbytes < t2['f1'].cbytes, "clevel not changed")
def test02(self): """Testing copy() with lower clevel""" N = 10 * 1000 ra = np.fromiter(((i, i**2.2) for i in xrange(N)), dtype='i4,f8') t = ca.ctable(ra) t2 = t.copy(cparams=ca.cparams(clevel=1)) self.assert_(t.cparams.clevel == ca.cparams().clevel) self.assert_(t2.cparams.clevel == 1) #print "cbytes in f1, f2:", t['f1'].cbytes, t2['f1'].cbytes self.assert_(t['f1'].cbytes < t2['f1'].cbytes, "clevel not changed")
def test02(self): """Testing copy() with lesser compression""" a = np.linspace(-1., 1., 1e4) b = ca.carray(a) c = b.copy(cparams=ca.cparams(clevel=1)) #print "b.cbytes, c.cbytes:", b.cbytes, c.cbytes self.assert_(b.cbytes < c.cbytes, "clevel not changed")
def test03(self): """Testing copy() with no shuffle""" a = np.linspace(-1., 1., 1e4) b = ca.carray(a) c = b.copy(cparams=ca.cparams(shuffle=False)) #print "b.cbytes, c.cbytes:", b.cbytes, c.cbytes self.assert_(b.cbytes < c.cbytes, "shuffle not changed")
def __init__(self, columns=None, names=None, **kwargs): # Important optional params self._cparams = kwargs.get("cparams", ca.cparams()) self.rootdir = kwargs.get("rootdir", None) "The directory where this object is saved." self.mode = kwargs.get("mode", "a") "The mode in which the object is created/opened." # Setup the columns accessor self.cols = cols(self.rootdir, self.mode) "The ctable columns accessor." # The length counter of this array self.len = 0 # Create a new ctable or open it from disk if columns is not None: self.create_ctable(columns, names, **kwargs) _new = True else: self.open_ctable() _new = False # Attach the attrs to this object self.attrs = attrs.attrs(self.rootdir, self.mode, _new=_new) # Cache a structured array of len 1 for ctable[int] acceleration self._arr1 = np.empty(shape=(1,), dtype=self.dtype)
def test01b(self): """Testing cparams when adding a new column (numpy flavor)""" N = 10 ra = np.fromiter(((i, i * 2.) for i in xrange(N)), dtype='i4,f8') t = ca.ctable(ra, cparams=ca.cparams(1)) c = np.arange(N, dtype='i8') * 3 t.addcol(c, 'f2') self.assert_(t['f2'].cparams.clevel == 1, "Incorrect clevel")
def test03(self): """Testing copy() with no shuffle""" N = 10 * 1000 ra = np.fromiter(((i, i**2.2) for i in xrange(N)), dtype='i4,f8') t = ca.ctable(ra) t2 = t.copy(cparams=ca.cparams(shuffle=False)) #print "cbytes in f1, f2:", t['f1'].cbytes, t2['f1'].cbytes self.assert_(t['f1'].cbytes < t2['f1'].cbytes, "clevel not changed")
def test03(self): """Testing copy() with no shuffle""" N = 10*1000 ra = np.fromiter(((i, i**2.2) for i in xrange(N)), dtype='i4,f8') t = ca.ctable(ra) t2 = t.copy(cparams=ca.cparams(shuffle=False)) #print "cbytes in f1, f2:", t['f1'].cbytes, t2['f1'].cbytes self.assert_(t['f1'].cbytes < t2['f1'].cbytes, "clevel not changed")
def test01b(self): """Testing cparams when adding a new column (numpy flavor)""" N = 10 ra = np.fromiter(((i, i*2.) for i in xrange(N)), dtype='i4,f8') t = ca.ctable(ra, cparams=ca.cparams(1)) c = np.arange(N, dtype='i8')*3 t.addcol(c, 'f2') self.assert_(t['f2'].cparams.clevel == 1, "Incorrect clevel")
def compute_carray(sexpr, clevel, kernel): # Uncomment the next for disabling threading # ca.set_nthreads(1) # ca.blosc_set_nthreads(1) print ("*** carray (using compression clevel = %d):" % clevel) x = cx # comment this for using numpy arrays in inputs t0 = time() cout = ca.eval(sexpr, kernel=kernel, cparams=ca.cparams(clevel)) print ("Time for ca.eval (%s) --> %.3f" % (kernel, time() - t0))
def compute_carray(sexpr, clevel, kernel): # Uncomment the next for disabling threading # Maybe due to some contention between Numexpr and Blosc? # ca.set_nthreads(ca.ncores//2) print "*** carray (using compression clevel = %d):" % clevel if clevel > 0: x, y, z = cx, cy, cz t0 = time() cout = ca.eval(sexpr, kernel=kernel, cparams=ca.cparams(clevel)) print "Time for ca.eval (%s) --> %.3f" % (kernel, time()-t0,), print ", cratio (out): %.1f" % (cout.nbytes / float(cout.cbytes))
def compute_carray(sexpr, clevel, kernel): # Uncomment the next for disabling threading #ca.set_nthreads(1) #ca.blosc_set_nthreads(1) print("*** carray (using compression clevel = %d):" % clevel) x = cx # comment this for using numpy arrays in inputs t0 = time() cout = ca.eval(sexpr, kernel=kernel, cparams=ca.cparams(clevel)) print("Time for ca.eval (%s) --> %.3f" % ( kernel, time() - t0, ))
def test_ctable(clevel): enter() tc = ca.fromiter((mv+np.random.rand(NC)-mv for i in xrange(int(NR))), dtype=dt, cparams=ca.cparams(clevel), count=int(NR)) after_create() out = np.fromiter((row for row in tc.where(squery, 'f1,f3')), dtype="f8,f8") after_query() return out
def test_ctable(clevel): enter() tc = ca.fromiter((mv + np.random.rand(NC) - mv for i in xrange(int(NR))), dtype=dt, cparams=ca.cparams(clevel), count=int(NR)) after_create() out = np.fromiter((row for row in tc.where(squery, 'f1,f3')), dtype="f8,f8") after_query() return out
def test01a(self): """Testing `__setitem()__` method with start,stop (scalar)""" a = np.ones((500,200), dtype="i4")*3 b = ca.fill((500,200), 3, dtype="i4", rootdir=self.rootdir, cparams=ca.cparams()) sl = slice(100,400) a[sl,:] = 0 b[sl] = 0 if self.open: b.flush() b = ca.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def compute_carray(sexpr, clevel, kernel): # Uncomment the next for disabling threading # Maybe due to some contention between Numexpr and Blosc? # ca.set_nthreads(ca.ncores//2) print "*** carray (using compression clevel = %d):" % clevel if clevel > 0: x, y, z = cx, cy, cz t0 = time() cout = ca.eval(sexpr, kernel=kernel, cparams=ca.cparams(clevel)) print "Time for ca.eval (%s) --> %.3f" % ( kernel, time() - t0, ), print ", cratio (out): %.1f" % (cout.nbytes / float(cout.cbytes))
def append(data, clevel): alldata = ca.carray(data[0], cparams=ca.cparams(clevel)) for carr in data[1:]: alldata.append(carr) return alldata
def __init__(self, cols, names=None, **kwargs): self.names = [] """The names of the columns (list).""" self.cols = {} """The ctable columns (dict).""" self.len = 0 """The number of rows (int).""" # Get the names of the cols if names is None: if isinstance(cols, np.ndarray): # ratype case names = list(cols.dtype.names) else: names = ["f%d" % i for i in range(len(cols))] else: if type(names) != list: try: names = list(names) except: raise ValueError, "cannot convert `names` into a list" if len(names) != len(cols): raise ValueError, "`cols` and `names` must have the same length" # Check name validity nt = namedtuple('_nt', names, verbose=False) names = list(nt._fields) self.names = names # Guess the kind of cols input calist, nalist, ratype = False, False, False if type(cols) in (tuple, list): calist = [type(v) for v in cols] == [ca.carray for v in cols] nalist = [type(v) for v in cols] == [np.ndarray for v in cols] elif isinstance(cols, np.ndarray): ratype = hasattr(cols.dtype, "names") if ratype: if len(cols.shape) != 1: raise ValueError, "only unidimensional shapes supported" else: raise ValueError, "`cols` input is not supported" if not (calist or nalist or ratype): raise ValueError, "`cols` input is not supported" # The compression parameters self._cparams = kwargs.get('cparams', ca.cparams()) # Populate the columns clen = -1 for i, name in enumerate(names): if calist: column = cols[i] elif nalist: column = cols[i] if column.dtype == np.void: raise ValueError, "`cols` elements cannot be of type void" column = ca.carray(column, **kwargs) elif ratype: column = ca.carray(cols[name], **kwargs) self.cols[name] = column if clen >= 0 and clen != len(column): raise ValueError, "all `cols` must have the same length" clen = len(column) self.len += clen # Cache a structured array of len 1 for ctable[int] acceleration self._arr1 = np.empty(shape=(1, ), dtype=self.dtype)
import numpy as np import carray as ca from time import time N = 10 * 1000 * 1000 CLEVEL = 5 a = np.linspace(0, 1, N) t0 = time() ac = ca.carray(a, cparams=ca.cparams(clevel=CLEVEL)) print "time creation (memory) ->", round(time()-t0, 3) print "data (memory):", repr(ac) t0 = time() b = ca.carray(a, cparams=ca.cparams(clevel=CLEVEL), rootdir='myarray') b.flush() print "time creation (disk) ->", round(time()-t0, 3) #print "meta (disk):", b.read_meta() t0 = time() an = np.array(a) print "time creation (numpy) ->", round(time()-t0, 3) t0 = time() c = ca.carray(rootdir='myarray') print "time open (disk) ->", round(time()-t0, 3) #print "meta (disk):", c.read_meta() print "data (disk):", repr(c) t0 = time()
# Benchmark for iterators import numpy as np import carray as ca from time import time N = 1e7 # the number of elements in x clevel = 5 # the compression level sexpr = "(x-1) < 10." # the expression to compute #sexpr = "((x-1) % 1000) == 0." # the expression to compute #sexpr = "(2*x**3+.3*y**2+z+1)<0" # the expression to compute cparams = ca.cparams(clevel) print "Creating inputs..." x = np.arange(N) cx = ca.carray(x, cparams=cparams) if 'y' not in sexpr: ct = ca.ctable((cx, ), names=['x']) else: y = np.arange(N) z = np.arange(N) cy = ca.carray(y, cparams=cparams) cz = ca.carray(z, cparams=cparams) ct = ca.ctable((cx, cy, cz), names=['x', 'y', 'z']) print "Evaluating...", sexpr t0 = time() cbout = ct.eval(sexpr) print "Time for evaluation--> %.3f" % (time() - t0, )
def __init__(self, cols, names=None, **kwargs): self.names = [] """The names of the columns (list).""" self.cols = {} """The ctable columns (dict).""" self.len = 0 """The number of rows (int).""" # Get the names of the cols if names is None: if isinstance(cols, np.ndarray): # ratype case names = list(cols.dtype.names) else: names = ["f%d"%i for i in range(len(cols))] else: if type(names) != list: try: names = list(names) except: raise ValueError, "cannot convert `names` into a list" if len(names) != len(cols): raise ValueError, "`cols` and `names` must have the same length" # Check name validity nt = namedtuple('_nt', names, verbose=False) names = list(nt._fields) self.names = names # Guess the kind of cols input calist, nalist, ratype = False, False, False if type(cols) in (tuple, list): calist = [type(v) for v in cols] == [ca.carray for v in cols] nalist = [type(v) for v in cols] == [np.ndarray for v in cols] elif isinstance(cols, np.ndarray): ratype = hasattr(cols.dtype, "names") if ratype: if len(cols.shape) != 1: raise ValueError, "only unidimensional shapes supported" else: raise ValueError, "`cols` input is not supported" if not (calist or nalist or ratype): raise ValueError, "`cols` input is not supported" # The compression parameters self._cparams = kwargs.get('cparams', ca.cparams()) # Populate the columns clen = -1 for i, name in enumerate(names): if calist: column = cols[i] elif nalist: column = cols[i] if column.dtype == np.void: raise ValueError, "`cols` elements cannot be of type void" column = ca.carray(column, **kwargs) elif ratype: column = ca.carray(cols[name], **kwargs) self.cols[name] = column if clen >= 0 and clen != len(column): raise ValueError, "all `cols` must have the same length" clen = len(column) self.len += clen # Cache a structured array of len 1 for ctable[int] acceleration self._arr1 = np.empty(shape=(1,), dtype=self.dtype)
# Benchmark for getitem import numpy as np import carray as ca from time import time N = 1e7 # the number of elements in x M = 100000 # the elements to get clevel = 1 # the compression level print "Creating inputs with %d elements..." % N cparams = ca.cparams(clevel) #x = np.arange(N) x = np.zeros(N, dtype="f8") y = x.copy() z = x.copy() cx = ca.carray(x, cparams=cparams) cy = cx.copy() cz = cx.copy() ct = ca.ctable((cx, cy, cz), names=['x','y','z']) t = ct[:] print "Starting benchmark now for getting %d elements..." % M # Retrieve from a ndarray t0 = time() vals = [x[i] for i in xrange(0, M, 3)] print "Time for array--> %.3f" % (time()-t0,) print "vals-->", len(vals)
import numpy as np import carray as ca from time import time N = 1e8 #a = np.arange(N, dtype='f8') a = np.random.randint(0, 10, N).astype('bool') t0 = time() sa = a.sum() print "Time sum() numpy --> %.3f" % (time() - t0) t0 = time() ac = ca.carray(a, cparams=ca.cparams(9)) print "Time carray conv --> %.3f" % (time() - t0) print "ac-->", ` ac ` t0 = time() sac = ac.sum() #sac = ac.sum(dtype=np.dtype('i8')) print "Time sum() carray --> %.3f" % (time() - t0) # t0 = time() # sac = sum(i for i in ac) # print "Time sum() carray (iter) --> %.3f" % (time()-t0) print "sa, sac-->", sa, sac, type(sa), type(sac) assert (sa == sac)
def test04(self): """Testing `__getitem()__` method with long ranges""" a = np.arange(1e4) b = chunk(a, atom=a.dtype, cparams=ca.cparams()) #print "b[1:8000]->", `b[1:8000]` assert_array_equal(a[1:8000], b[1:8000], "Arrays are not equal")
def test03(self): """Testing `__getitem()__` method with ranges and steps""" a = np.arange(1e3) b = chunk(a, atom=a.dtype, cparams=ca.cparams()) #print "b[1:8:3]->", `b[1:8:3]` assert_array_equal(a[1:8:3], b[1:8:3], "Arrays are not equal")
z = xrange(2, N + 2) print "Starting benchmark now for creating arrays..." # Create a ndarray #x = (i for i in xrange(N)) # true iterable t0 = time() out = np.fromiter(x, dtype='f8', count=N) print "Time for array--> %.3f" % (time() - t0, ) print "out-->", len(out) #ca.set_num_threads(ca.ncores//2) # Create a carray #x = (i for i in xrange(N)) # true iterable t0 = time() cout = ca.fromiter(x, dtype='f8', count=N, cparams=ca.cparams(clevel)) print "Time for carray--> %.3f" % (time() - t0, ) print "cout-->", len(cout) assert_array_equal(out, cout, "Arrays are not equal") # Create a carray (with unknown size) #x = (i for i in xrange(N)) # true iterable t0 = time() cout = ca.fromiter(x, dtype='f8', count=-1, cparams=ca.cparams(clevel)) print "Time for carray (count=-1)--> %.3f" % (time() - t0, ) print "cout-->", len(cout) assert_array_equal(out, cout, "Arrays are not equal") # Retrieve from a structured ndarray gen = ((i, j, k) for i, j, k in it.izip(x, y, z)) t0 = time()
import numpy as np import carray as ca from time import time N = 1e8 #a = np.arange(N, dtype='f8') a = np.random.randint(0,10,N).astype('bool') t0 = time() sa = a.sum() print "Time sum() numpy --> %.3f" % (time()-t0) t0 = time() ac = ca.carray(a, cparams=ca.cparams(9)) print "Time carray conv --> %.3f" % (time()-t0) print "ac-->", `ac` t0 = time() sac = ac.sum() #sac = ac.sum(dtype=np.dtype('i8')) print "Time sum() carray --> %.3f" % (time()-t0) # t0 = time() # sac = sum(i for i in ac) # print "Time sum() carray (iter) --> %.3f" % (time()-t0) print "sa, sac-->", sa, sac, type(sa), type(sac) assert(sa == sac)
#print "cout-->", repr(cout) if __name__ == "__main__": N = 1e8 # the number of elements in x clevel = 9 # the compression level sexpr = "(x+1)<0" sexpr = "(((.25*x + .75)*x - 1.5)*x - 2)<0" #sexpr = "(((.25*x + .75)*x - 1.5)*x - 2)" doprofile = 0 print("Creating inputs...") x = np.arange(N) #x = np.linspace(0,100,N) cx = ca.carray(x, cparams=ca.cparams(clevel)) print("Evaluating '%s' with 10^%d points" % (sexpr, int(math.log10(N)))) t0 = time() cout = ne.evaluate(sexpr) print "Time for numexpr --> %.3f" % (time() - t0, ) if doprofile: import pstats import cProfile as prof prof.run( 'compute_carray(sexpr, clevel=clevel, kernel="numexpr")', #prof.run('compute_carray(sexpr, clevel=clevel, kernel="python")', 'eval.prof') stats = pstats.Stats('eval.prof')
z = xrange(2,N+2) print "Starting benchmark now for creating arrays..." # Create a ndarray #x = (i for i in xrange(N)) # true iterable t0 = time() out = np.fromiter(x, dtype='f8', count=N) print "Time for array--> %.3f" % (time()-t0,) print "out-->", len(out) #ca.set_num_threads(ca.ncores//2) # Create a carray #x = (i for i in xrange(N)) # true iterable t0 = time() cout = ca.fromiter(x, dtype='f8', count=N, cparams=ca.cparams(clevel)) print "Time for carray--> %.3f" % (time()-t0,) print "cout-->", len(cout) assert_array_equal(out, cout, "Arrays are not equal") # Create a carray (with unknown size) #x = (i for i in xrange(N)) # true iterable t0 = time() cout = ca.fromiter(x, dtype='f8', count=-1, cparams=ca.cparams(clevel)) print "Time for carray (count=-1)--> %.3f" % (time()-t0,) print "cout-->", len(cout) assert_array_equal(out, cout, "Arrays are not equal") # Retrieve from a structured ndarray gen = ((i,j,k) for i,j,k in it.izip(x,y,z)) t0 = time()
# print "cout-->", repr(cout) if __name__ == "__main__": N = 1e8 # the number of elements in x clevel = 9 # the compression level sexpr = "(x+1)<0" sexpr = "(((.25*x + .75)*x - 1.5)*x - 2)<0" # sexpr = "(((.25*x + .75)*x - 1.5)*x - 2)" doprofile = 0 print ("Creating inputs...") x = np.arange(N) # x = np.linspace(0,100,N) cx = ca.carray(x, cparams=ca.cparams(clevel)) print ("Evaluating '%s' with 10^%d points" % (sexpr, int(math.log10(N)))) t0 = time() cout = ne.evaluate(sexpr) print "Time for numexpr --> %.3f" % (time() - t0,) if doprofile: import pstats import cProfile as prof prof.run( 'compute_carray(sexpr, clevel=clevel, kernel="numexpr")', # prof.run('compute_carray(sexpr, clevel=clevel, kernel="python")', "eval.prof",
def test01(self): """Testing `__getitem()__` method with scalars""" a = np.arange(1e3) b = chunk(a, atom=a.dtype, cparams=ca.cparams()) #print "b[1]->", `b[1]` self.assert_(a[1] == b[1], "Values in key 1 are not equal")