def __init__(self, data=None, dshape=None, params=None): # need at least one of the three assert (data is not None) or (dshape is not None) or \ (params.get('storage')) if isinstance(data, ctable): self.ca = data return # Extract the relevant carray parameters from the more # general Blaze params object. if params: cparams, rootdir, format_flavor = to_cparams(params) else: rootdir, cparams = None, None # Extract the relevant carray parameters from the more # general Blaze params object. if dshape: shape, dtype = to_numpy(dshape) if len(data) == 0: data = np.empty(0, dtype=dtype) self.ca = ctable(data, rootdir=rootdir, cparams=cparams) else: self.ca = ctable(data, dtype=dtype, rootdir=rootdir) else: self.ca = ctable(data, rootdir=rootdir, cparams=cparams)
def __init__(self, data=None, dshape=None, params=None): # need at least one of the three assert (data is not None) or (dshape is not None) or \ (params.get('storage')) if isinstance(data, ctable): self.ca = data return # Extract the relevant carray parameters from the more # general Blaze params object. if params: cparams, rootdir, format_flavor = to_cparams(params) else: rootdir,cparams = None, None # Extract the relevant carray parameters from the more # general Blaze params object. if dshape: shape, dtype = to_numpy(dshape) if len(data) == 0: data = np.empty(0, dtype=dtype) self.ca = ctable(data, rootdir=rootdir, cparams=cparams) else: self.ca = ctable(data, dtype=dtype, rootdir=rootdir) else: self.ca = ctable(data, rootdir=rootdir, cparams=cparams)
def open(rootdir, mode='a'): """ open(rootdir, mode='a') Open a disk-based carray/ctable. Parameters ---------- rootdir : pathname (string) The directory hosting the carray/ctable object. mode : the open mode (string) Specifies the mode in which the object is opened. The supported values are: * 'r' for read-only * 'w' for emptying the previous underlying data * 'a' for allowing read/write on top of existing data Returns ------- out : a carray/ctable object or None (if not objects are found) """ # First try with a carray obj = None try: obj = carray(rootdir=rootdir, mode=mode) except IOError: # Not a carray. Now with a ctable try: obj = ctable(rootdir=rootdir, mode=mode) except IOError: # Not a ctable pass return obj
def walk(dir, classname=None, mode='a'): """walk(dir, classname=None, mode='a') Recursively iterate over carray/ctable objects hanging from `dir`. Parameters ---------- dir : string The directory from which the listing starts. classname : string If specified, only object of this class are returned. The values supported are 'carray' and 'ctable'. mode : string The mode in which the object should be opened. Returns ------- out : iterator Iterator over the objects found. """ # First, iterate over the carray objects in current dir names = os.path.join(dir, '*') dirs = [] for node in glob.glob(names): if os.path.isdir(node): try: obj = carray(rootdir=node, mode=mode) except: try: obj = ctable(rootdir=node, mode=mode) except: obj = None dirs.append(node) if obj: if classname: if obj.__class__.__name__ == classname: yield obj else: yield obj # Then recurse into the true directories for dir_ in dirs: for node in walk(dir_, classname, mode): yield node
def fromiter(iterable, dtype, count, **kwargs): """ fromiter(iterable, dtype, count, **kwargs) Create a carray/ctable from an `iterable` object. Parameters ---------- iterable : iterable object An iterable object providing data for the carray. dtype : numpy.dtype instance Specifies the type of the outcome object. count : int The number of items to read from iterable. If set to -1, means that the iterable will be used until exhaustion (not recommended, see note below). kwargs : list of parameters or dictionary Any parameter supported by the carray/ctable constructors. Returns ------- out : a carray/ctable object Notes ----- Please specify `count` to both improve performance and to save memory. It allows `fromiter` to avoid looping the iterable twice (which is slooow). It avoids memory leaks to happen too (which can be important for large iterables). """ from ctable import ctable # Check for a true iterable if not hasattr(iterable, "next"): iterable = iter(iterable) # Try to guess the final length expected = count if count == -1: # Try to guess the size of the iterable length if hasattr(iterable, "__length_hint__"): count = iterable.__length_hint__() expected = count else: # No guess count = sys.maxint # If we do not have a hint on the iterable length then # create a couple of iterables and use the second when the # first one is exhausted (ValueError will be raised). iterable, iterable2 = it.tee(iterable) expected = 1000*1000 # 1 million elements # First, create the container expectedlen = kwargs.pop("expectedlen", expected) dtype = np.dtype(dtype) if dtype.kind == "V": # A ctable obj = ctable(np.array([], dtype=dtype), expectedlen=expectedlen, **kwargs) chunklen = sum(obj.cols[name].chunklen for name in obj.names) // len(obj.names) else: # A carray obj = carray(np.array([], dtype=dtype), expectedlen=expectedlen, **kwargs) chunklen = obj.chunklen # Then fill it nread, blen = 0, 0 while nread < count: if nread + chunklen > count: blen = count - nread else: blen = chunklen if count != sys.maxint: chunk = np.fromiter(iterable, dtype=dtype, count=blen) else: try: chunk = np.fromiter(iterable, dtype=dtype, count=blen) except ValueError: # Positionate in second iterable iter2 = it.islice(iterable2, nread, None, 1) # We are reaching the end, use second iterable now chunk = np.fromiter(iter2, dtype=dtype, count=-1) obj.append(chunk) nread += len(chunk) # Check the end of the iterable if len(chunk) < chunklen: break obj.flush() return obj
import blaze from blaze.carray import carray from blaze.carray.ctable import ctable import numpy as np STORAGE = 'example1' #------------------------------------------------------------------------ if not os.path.exists(STORAGE): print 'Creating tables' N = 100000 a = carray(np.arange(N, dtype='i4')) b = carray(np.arange(N, dtype='f8')+1) t = ctable((a, b), ('f0', 'f1'), rootdir='example1', mode='w') t.flush() #------------------------------------------------------------------------ from time import time print '-------------------' t = blaze.open('ctable://example1') # Using chunked blaze array we can optimize for IO doing the sum # operations chunkwise from disk. t0 = time() print blaze.mean(t, 'f0') print "Chunked mean", round(time()-t0, 6)