def open(rootdir, mode='a'): """ open(rootdir, mode='a') Open a disk-based carray/ctable. Parameters ---------- rootdir : pathname (string) The directory hosting the carray/ctable object. mode : the open mode (string) Specifies the mode in which the object is opened. The supported values are: * 'r' for read-only * 'w' for emptying the previous underlying data * 'a' for allowing read/write on top of existing data Returns ------- out : a carray/ctable object or None (if not objects are found) """ # First try with a carray obj = None try: obj = carray(rootdir=rootdir, mode=mode) except IOError: # Not a carray. Now with a ctable try: obj = ctable(rootdir=rootdir, mode=mode) except IOError: # Not a ctable pass return obj
def fill(shape, dflt=None, dtype=np.float, **kwargs): """ fill(shape, dtype=float, dflt=None, **kwargs) Return a new carray object of given shape and type, filled with `dflt`. Parameters ---------- shape : int Shape of the new array, e.g., ``(2,3)``. dflt : Python or NumPy scalar The value to be used during the filling process. If None, values are filled with zeros. Also, the resulting carray will have this value as its `dflt` value. dtype : data-type, optional The desired data-type for the array, e.g., `numpy.int8`. Default is `numpy.float64`. kwargs : list of parameters or dictionary Any parameter supported by the carray constructor. Returns ------- out : carray Array filled with `dflt` values with the given shape and dtype. See Also -------- ones, zeros """ dtype = np.dtype(dtype) if type(shape) in (int, long, float): shape = (int(shape),) else: shape = tuple(shape) if len(shape) > 1: # Multidimensional shape. # The atom will have shape[1:] dims (+ the dtype dims). dtype = np.dtype((dtype.base, shape[1:]+dtype.shape)) length = shape[0] # Create the container expectedlen = kwargs.pop("expectedlen", length) if dtype.kind == "V" and dtype.shape == (): raise ValueError, "fill does not support ctables objects" obj = carray([], dtype=dtype, dflt=dflt, expectedlen=expectedlen, **kwargs) chunklen = obj.chunklen # Then fill it # We need an array for the defaults so as to keep the atom info dflt = np.array(obj.dflt, dtype=dtype) # Making strides=(0,) below is a trick to create the array fast and # without memory consumption chunk = np.ndarray(length, dtype=dtype, buffer=dflt, strides=(0,)) obj.append(chunk) obj.flush() return obj
def read_meta_and_open(self): """Read the meta-information and initialize structures.""" # Get the directories of the columns rootsfile = os.path.join(self.rootdir, ROOTDIRS) with open(rootsfile, 'rb') as rfile: data = json.loads(rfile.read()) # JSON returns unicode (?) self.names = [str(name) for name in data['names']] # Initialize the cols by instatiating the carrays for name, dir_ in data['dirs'].items(): self._cols[str(name)] = carray(rootdir=dir_, mode=self.mode)
def walk(dir, classname=None, mode='a'): """walk(dir, classname=None, mode='a') Recursively iterate over carray/ctable objects hanging from `dir`. Parameters ---------- dir : string The directory from which the listing starts. classname : string If specified, only object of this class are returned. The values supported are 'carray' and 'ctable'. mode : string The mode in which the object should be opened. Returns ------- out : iterator Iterator over the objects found. """ # First, iterate over the carray objects in current dir names = os.path.join(dir, '*') dirs = [] for node in glob.glob(names): if os.path.isdir(node): try: obj = carray(rootdir=node, mode=mode) except: try: obj = ctable(rootdir=node, mode=mode) except: obj = None dirs.append(node) if obj: if classname: if obj.__class__.__name__ == classname: yield obj else: yield obj # Then recurse into the true directories for dir_ in dirs: for node in walk(dir_, classname, mode): yield node
def fromiter(iterable, dtype, count, **kwargs): """ fromiter(iterable, dtype, count, **kwargs) Create a carray/ctable from an `iterable` object. Parameters ---------- iterable : iterable object An iterable object providing data for the carray. dtype : numpy.dtype instance Specifies the type of the outcome object. count : int The number of items to read from iterable. If set to -1, means that the iterable will be used until exhaustion (not recommended, see note below). kwargs : list of parameters or dictionary Any parameter supported by the carray/ctable constructors. Returns ------- out : a carray/ctable object Notes ----- Please specify `count` to both improve performance and to save memory. It allows `fromiter` to avoid looping the iterable twice (which is slooow). It avoids memory leaks to happen too (which can be important for large iterables). """ from ctable import ctable # Check for a true iterable if not hasattr(iterable, "next"): iterable = iter(iterable) # Try to guess the final length expected = count if count == -1: # Try to guess the size of the iterable length if hasattr(iterable, "__length_hint__"): count = iterable.__length_hint__() expected = count else: # No guess count = sys.maxint # If we do not have a hint on the iterable length then # create a couple of iterables and use the second when the # first one is exhausted (ValueError will be raised). iterable, iterable2 = it.tee(iterable) expected = 1000*1000 # 1 million elements # First, create the container expectedlen = kwargs.pop("expectedlen", expected) dtype = np.dtype(dtype) if dtype.kind == "V": # A ctable obj = ctable(np.array([], dtype=dtype), expectedlen=expectedlen, **kwargs) chunklen = sum(obj.cols[name].chunklen for name in obj.names) // len(obj.names) else: # A carray obj = carray(np.array([], dtype=dtype), expectedlen=expectedlen, **kwargs) chunklen = obj.chunklen # Then fill it nread, blen = 0, 0 while nread < count: if nread + chunklen > count: blen = count - nread else: blen = chunklen if count != sys.maxint: chunk = np.fromiter(iterable, dtype=dtype, count=blen) else: try: chunk = np.fromiter(iterable, dtype=dtype, count=blen) except ValueError: # Positionate in second iterable iter2 = it.islice(iterable2, nread, None, 1) # We are reaching the end, use second iterable now chunk = np.fromiter(iter2, dtype=dtype, count=-1) obj.append(chunk) nread += len(chunk) # Check the end of the iterable if len(chunk) < chunklen: break obj.flush() return obj
def addcol(self, newcol, name=None, pos=None, **kwargs): """ addcol(newcol, name=None, pos=None, **kwargs) Add a new `newcol` object as column. Parameters ---------- newcol : carray, ndarray, list or tuple If a carray is passed, no conversion will be carried out. If conversion to a carray has to be done, `kwargs` will apply. name : string, optional The name for the new column. If not passed, it will receive an automatic name. pos : int, optional The column position. If not passed, it will be appended at the end. kwargs : list of parameters or dictionary Any parameter supported by the carray constructor. Notes ----- You should not specificy both `name` and `pos` arguments, unless they are compatible. See Also -------- delcol """ # Check params if pos is None: pos = len(self.names) else: if pos and type(pos) != int: raise ValueError, "`pos` must be an int" if pos < 0 or pos > len(self.names): raise ValueError, "`pos` must be >= 0 and <= len(self.cols)" if name is None: name = "f%d" % pos else: if type(name) != str: raise ValueError, "`name` must be a string" if name in self.names: raise ValueError, "'%s' column already exists" % name if len(newcol) != self.len: raise ValueError, "`newcol` must have the same length than ctable" if isinstance(newcol, np.ndarray): if 'cparams' not in kwargs: kwargs['cparams'] = self.cparams newcol = carray(newcol, **kwargs) elif type(newcol) in (list, tuple): if 'cparams' not in kwargs: kwargs['cparams'] = self.cparams newcol = carray(newcol, **kwargs) elif type(newcol) != carray: raise ValueError( """`newcol` type not supported""") # Insert the column self.cols.insert(name, pos, newcol) # Update _arr1 self._arr1 = np.empty(shape=(1,), dtype=self.dtype)
def create_ctable(self, columns, names, **kwargs): """Create a ctable anew.""" # Create the rootdir if necessary if self.rootdir: self.mkdir_rootdir(self.rootdir, self.mode) # Get the names of the columns if names is None: if isinstance(columns, np.ndarray): # ratype case if columns.dtype.names is None: raise ValueError("dtype should be structured") else: names = list(columns.dtype.names) else: names = ["f%d"%i for i in range(len(columns))] else: if type(names) == tuple: names = list(names) if type(names) != list: raise ValueError( "`names` can only be a list or tuple") if len(names) != len(columns): raise ValueError( "`columns` and `names` must have the same length") # Check names validity nt = namedtuple('_nt', names, verbose=False) names = list(nt._fields) # Guess the kind of columns input calist, nalist, ratype = False, False, False if type(columns) in (tuple, list): calist = [type(v) for v in columns] == [carray for v in columns] nalist = [type(v) for v in columns] == [np.ndarray for v in columns] elif isinstance(columns, np.ndarray): ratype = hasattr(columns.dtype, "names") if ratype: if len(columns.shape) != 1: raise ValueError, "only unidimensional shapes supported" else: raise ValueError, "`columns` input is not supported" if not (calist or nalist or ratype): # Try to convert the elements to carrays try: columns = [carray(col) for col in columns] calist = True except: raise ValueError, "`columns` input is not supported" # Populate the columns clen = -1 for i, name in enumerate(names): if self.rootdir: # Put every carray under each own `name` subdirectory kwargs['rootdir'] = os.path.join(self.rootdir, name) if calist: column = columns[i] if self.rootdir: # Store this in destination column = column.copy(**kwargs) elif nalist: column = columns[i] if column.dtype == np.void: raise ValueError,( "`columns` elements cannot be of type void") column = carray(column, **kwargs) elif ratype: column = carray(columns[name], **kwargs) self.cols[name] = column if clen >= 0 and clen != len(column): raise ValueError, "all `columns` must have the same length" clen = len(column) self.len = clen