class CacheArray(NotLoggedMixin, EArray, indexesextension.CacheArray): """Container for keeping index caches of 1st and 2nd level.""" # Class identifier. _c_classid = 'CACHEARRAY' _c_classId = previous_api_property('_c_classid')
class MarkG(NotLoggedMixin, Group): # Class identifier. _c_classid = 'MARKG' _c_classId = previous_api_property('_c_classid') import re _c_shadow_name_re = re.compile(r'^a[0-9]+$') def _g_width_warning(self): warnings.warn( """\ mark ``%s`` is exceeding the recommended maximum action storage (%d nodes);\ be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" % (self._v_pathname, self._v_max_group_width), PerformanceWarning) _g_widthWarning = previous_api(_g_width_warning) def _g_reset(self): """Empty action storage (nodes and attributes). This method empties all action storage kept in this node: nodes and attributes. """ # Remove action storage nodes. for child in self._v_children.values(): child._g_remove(True, True) # Remove action storage attributes. attrs = self._v_attrs shname = self._c_shadow_name_re for attrname in attrs._v_attrnamesuser[:]: if shname.match(attrname): attrs._g__delattr(attrname)
class LastRowArray(NotLoggedMixin, CArray, indexesextension.LastRowArray): """Container for keeping sorted and indices values of last row of an index.""" # Class identifier. _c_classid = 'LASTROWARRAY' _c_classId = previous_api_property('_c_classid')
class TransactionGroupG(NotLoggedMixin, Group): _c_classid = 'TRANSGROUP' _c_classId = previous_api_property('_c_classid') def _g_width_warning(self): warnings.warn("""\ the number of transactions is exceeding the recommended maximum (%d);\ be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" % (self._v_max_group_width,), PerformanceWarning) _g_widthWarning = previous_api(_g_width_warning)
class ImageArray(Array): """Array containing an image. This class has no additional behaviour or functionality compared to that of an ordinary array. It simply enables the user to open an ``IMAGE`` HDF5 node as a normal `Array` node in PyTables. """ # Class identifier. _c_classid = 'IMAGE' _c_classId = previous_api_property('_c_classid')
class Unknown(Node): """This class represents nodes reported as *unknown* by the underlying HDF5 library. This class does not have any public instance variables or methods, except those inherited from the Node class. """ # Class identifier _c_classid = 'UNKNOWN' _c_classId = previous_api_property('_c_classid') def __init__(self, parentnode, name): """Create the `Unknown` instance.""" self._v_new = False super(Unknown, self).__init__(parentnode, name) def _g_new(self, parentnode, name, init=False): pass def _g_open(self): return 0 def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs): # Silently avoid doing copies of unknown nodes return None def _g_delete(self, parent): pass def __str__(self): pathname = self._v_pathname classname = self.__class__.__name__ return "%s (%s)" % (pathname, classname) def __repr__(self): return """%s NOTE: <The Unknown object represents a node which is reported as unknown by the underlying HDF5 library, but that might be supported in more recent HDF5 versions.> """ % (str(self))
class EArray(CArray): """This class represents extendable, homogeneous datasets in an HDF5 file. The main difference between an EArray and a CArray (see :ref:`CArrayClassDescr`), from which it inherits, is that the former can be enlarged along one of its dimensions, the *enlargeable dimension*. That means that the :attr:`Leaf.extdim` attribute (see :class:`Leaf`) of any EArray instance will always be non-negative. Multiple enlargeable dimensions might be supported in the future. New rows can be added to the end of an enlargeable array by using the :meth:`EArray.append` method. Parameters ---------- parentnode The parent :class:`Group` object. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode*. name : str The name of this node in its parent group. atom An `Atom` instance representing the *type* and *shape* of the atomic objects to be saved. shape The shape of the new array. One (and only one) of the shape dimensions *must* be 0. The dimension being 0 means that the resulting `EArray` object can be extended along it. Multiple enlargeable dimensions are not supported right now. title A description for this node (it sets the ``TITLE`` HDF5 attribute on disk). filters An instance of the `Filters` class that provides information about the desired I/O filters to be applied during the life of this object. expectedrows A user estimate about the number of row elements that will be added to the growable dimension in the `EArray` node. If not provided, the default value is ``EXPECTED_ROWS_EARRAY`` (see ``tables/parameters.py``). If you plan to create either a much smaller or a much bigger `EArray` try providing a guess; this will optimize the HDF5 B-Tree creation and management process time and the amount of memory used. chunkshape The shape of the data chunk to be read or written in a single HDF5 I/O operation. Filters are applied to those chunks of data. The dimensionality of `chunkshape` must be the same as that of `shape` (beware: no dimension should be 0 this time!). If ``None``, a sensible value is calculated based on the `expectedrows` parameter (which is recommended). byteorder The byteorder of the data *on disk*, specified as 'little' or 'big'. If this is not specified, the byteorder is that of the platform. Examples -------- See below a small example of the use of the `EArray` class. The code is available in ``examples/earray1.py``:: import tables import numpy fileh = tables.open_file('earray1.h5', mode='w') a = tables.StringAtom(itemsize=8) # Use ``a`` as the object type for the enlargeable array. array_c = fileh.create_earray(fileh.root, 'array_c', a, (0,), \"Chars\") array_c.append(numpy.array(['a'*2, 'b'*4], dtype='S8')) array_c.append(numpy.array(['a'*6, 'b'*8, 'c'*10], dtype='S8')) # Read the string ``EArray`` we have created on disk. for s in array_c: print 'array_c[%s] => %r' % (array_c.nrow, s) # Close the file. fileh.close() The output for the previous script is something like:: array_c[0] => 'aa' array_c[1] => 'bbbb' array_c[2] => 'aaaaaa' array_c[3] => 'bbbbbbbb' array_c[4] => 'cccccccc' """ # Class identifier. _c_classid = 'EARRAY' _c_classId = previous_api_property('_c_classid') # Special methods # ~~~~~~~~~~~~~~~ def __init__(self, parentnode, name, atom=None, shape=None, title="", filters=None, expectedrows=None, chunkshape=None, byteorder=None, _log=True): # Specific of EArray if expectedrows is None: expectedrows = parentnode._v_file.params['EXPECTED_ROWS_EARRAY'] self._v_expectedrows = expectedrows """The expected number of rows to be stored in the array.""" # Call the parent (CArray) init code super(EArray, self).__init__(parentnode, name, atom, shape, title, filters, chunkshape, byteorder, _log) # Public and private methods # ~~~~~~~~~~~~~~~~~~~~~~~~~~ def _g_create(self): """Create a new array in file (specific part).""" # Pre-conditions and extdim computation zerodims = numpy.sum(numpy.array(self.shape) == 0) if zerodims > 0: if zerodims == 1: self.extdim = list(self.shape).index(0) else: raise NotImplementedError( "Multiple enlargeable (0-)dimensions are not " "supported.") else: raise ValueError("When creating EArrays, you need to set one of " "the dimensions of the Atom instance to zero.") # Finish the common part of the creation process return self._g_create_common(self._v_expectedrows) def _check_shape_append(self, nparr): "Test that nparr shape is consistent with underlying EArray." # The arrays conforms self expandibility? myrank = len(self.shape) narank = len(nparr.shape) - len(self.atom.shape) if myrank != narank: raise ValueError(("the ranks of the appended object (%d) and the " "``%s`` EArray (%d) differ") % (narank, self._v_pathname, myrank)) for i in range(myrank): if i != self.extdim and self.shape[i] != nparr.shape[i]: raise ValueError(("the shapes of the appended object and the " "``%s`` EArray differ in non-enlargeable " "dimension %d") % (self._v_pathname, i)) _checkShapeAppend = previous_api(_check_shape_append) def append(self, sequence): """Add a sequence of data to the end of the dataset. The sequence must have the same type as the array; otherwise a TypeError is raised. In the same way, the dimensions of the sequence must conform to the shape of the array, that is, all dimensions must match, with the exception of the enlargeable dimension, which can be of any length (even 0!). If the shape of the sequence is invalid, a ValueError is raised. """ self._g_check_open() self._v_file._check_writable() # Convert the sequence into a NumPy object nparr = convert_to_np_atom2(sequence, self.atom) # Check if it has a consistent shape with underlying EArray self._check_shape_append(nparr) # If the size of the nparr is zero, don't do anything else if nparr.size > 0: self._append(nparr) def _g_copy_with_stats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): """Private part of Leaf.copy() for each kind of leaf.""" (start, stop, step) = self._process_range_read(start, stop, step) # Build the new EArray object maindim = self.maindim shape = list(self.shape) shape[maindim] = 0 # The number of final rows nrows = len(xrange(start, stop, step)) # Build the new EArray object object = EArray(group, name, atom=self.atom, shape=shape, title=title, filters=filters, expectedrows=nrows, chunkshape=chunkshape, _log=_log) # Now, fill the new earray with values from source nrowsinbuf = self.nrowsinbuf # The slices parameter for self.__getitem__ slices = [slice(0, dim, 1) for dim in self.shape] # This is a hack to prevent doing unnecessary conversions # when copying buffers self._v_convert = False # Start the copy itself for start2 in xrange(start, stop, step * nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the extensible dimension slices[maindim] = slice(start2, stop2, step) object._append(self.__getitem__(tuple(slices))) # Active the conversion again (default) self._v_convert = True nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.itemsize return (object, nbytes) _g_copyWithStats = previous_api(_g_copy_with_stats)
class VLArray(hdf5extension.VLArray, Leaf): """This class represents variable length (ragged) arrays in an HDF5 file. Instances of this class represent array objects in the object tree with the property that their rows can have a *variable* number of homogeneous elements, called *atoms*. Like Table datasets (see :ref:`TableClassDescr`), variable length arrays can have only one dimension, and the elements (atoms) of their rows can be fully multidimensional. When reading a range of rows from a VLArray, you will *always* get a Python list of objects of the current flavor (each of them for a row), which may have different lengths. This class provides methods to write or read data to or from variable length array objects in the file. Note that it also inherits all the public attributes and methods that Leaf (see :ref:`LeafClassDescr`) already provides. .. note:: VLArray objects also support compression although compression is only performed on the data structures used internally by the HDF5 to take references of the location of the variable length data. Data itself (the raw data) are not compressed or filtered. Please refer to the `VLTypes Technical Note <http://www.hdfgroup.org/HDF5/doc/TechNotes/VLTypes.html>`_ for more details on the topic. Parameters ---------- parentnode The parent :class:`Group` object. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode*. name : str The name of this node in its parent group. atom An `Atom` instance representing the *type* and *shape* of the atomic objects to be saved. title A description for this node (it sets the ``TITLE`` HDF5 attribute on disk). filters An instance of the `Filters` class that provides information about the desired I/O filters to be applied during the life of this object. expectedrows A user estimate about the number of row elements that will be added to the growable dimension in the `VLArray` node. If not provided, the default value is ``EXPECTED_ROWS_VLARRAY`` (see ``tables/parameters.py``). If you plan to create either a much smaller or a much bigger `VLArray` try providing a guess; this will optimize the HDF5 B-Tree creation and management process time and the amount of memory used. .. versionadded:: 3.0 chunkshape The shape of the data chunk to be read or written in a single HDF5 I/O operation. Filters are applied to those chunks of data. The dimensionality of `chunkshape` must be 1. If ``None``, a sensible value is calculated (which is recommended). byteorder The byteorder of the data *on disk*, specified as 'little' or 'big'. If this is not specified, the byteorder is that of the platform. .. versionchanged:: 3.0 The *expectedsizeinMB* parameter has been replaced by *expectedrows*. Examples -------- See below a small example of the use of the VLArray class. The code is available in :file:`examples/vlarray1.py`:: import tables from numpy import * # Create a VLArray: fileh = tables.open_file('vlarray1.h5', mode='w') vlarray = fileh.create_vlarray(fileh.root, 'vlarray1', tables.Int32Atom(shape=()), "ragged array of ints", filters=tables.Filters(1)) # Append some (variable length) rows: vlarray.append(array([5, 6])) vlarray.append(array([5, 6, 7])) vlarray.append([5, 6, 9, 8]) # Now, read it through an iterator: print('-->', vlarray.title) for x in vlarray: print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, x)) # Now, do the same with native Python strings. vlarray2 = fileh.create_vlarray(fileh.root, 'vlarray2', tables.StringAtom(itemsize=2), "ragged array of strings", filters=tables.Filters(1)) vlarray2.flavor = 'python' # Append some (variable length) rows: print('-->', vlarray2.title) vlarray2.append(['5', '66']) vlarray2.append(['5', '6', '77']) vlarray2.append(['5', '6', '9', '88']) # Now, read it through an iterator: for x in vlarray2: print('%s[%d]--> %s' % (vlarray2.name, vlarray2.nrow, x)) # Close the file. fileh.close() The output for the previous script is something like:: --> ragged array of ints vlarray1[0]--> [5 6] vlarray1[1]--> [5 6 7] vlarray1[2]--> [5 6 9 8] --> ragged array of strings vlarray2[0]--> ['5', '66'] vlarray2[1]--> ['5', '6', '77'] vlarray2[2]--> ['5', '6', '9', '88'] .. rubric:: VLArray attributes The instance variables below are provided in addition to those in Leaf (see :ref:`LeafClassDescr`). .. attribute:: atom An Atom (see :ref:`AtomClassDescr`) instance representing the *type* and *shape* of the atomic objects to be saved. You may use a *pseudo-atom* for storing a serialized object or variable length string per row. .. attribute:: flavor The type of data object read from this leaf. Please note that when reading several rows of VLArray data, the flavor only applies to the *components* of the returned Python list, not to the list itself. .. attribute:: nrow On iterators, this is the index of the current row. .. attribute:: nrows The current number of rows in the array. .. attribute:: extdim The index of the enlargeable dimension (always 0 for vlarrays). """ # Class identifier. _c_classid = 'VLARRAY' _c_classId = previous_api_property('_c_classid') # Lazy read-only attributes # ````````````````````````` @lazyattr def dtype(self): """The NumPy ``dtype`` that most closely matches this array.""" return self.atom.dtype # Properties # ~~~~~~~~~~ shape = property(lambda self: (self.nrows, ), None, None, "The shape of the stored array.") def _get_size_on_disk(self): raise NotImplementedError('size_on_disk not implemented for VLArrays') size_on_disk = property( _get_size_on_disk, None, None, """ The HDF5 library does not include a function to determine size_on_disk for variable-length arrays. Accessing this attribute will raise a NotImplementedError. """) size_in_memory = property( lambda self: self._get_memory_size(), None, None, """ The size of this array's data in bytes when it is fully loaded into memory. .. note:: When data is stored in a VLArray using the ObjectAtom type, it is first serialized using pickle, and then converted to a NumPy array suitable for storage in an HDF5 file. This attribute will return the size of that NumPy representation. If you wish to know the size of the Python objects after they are loaded from disk, you can use this `ActiveState recipe <http://code.activestate.com/recipes/577504/>`_. """) # Other methods # ~~~~~~~~~~~~~ def __init__(self, parentnode, name, atom=None, title="", filters=None, expectedrows=None, chunkshape=None, byteorder=None, _log=True): self._v_version = None """The object version of this array.""" self._v_new = new = atom is not None """Is this the first time the node has been created?""" self._v_new_title = title """New title for this node.""" self._v_new_filters = filters """New filter properties for this array.""" if expectedrows is None: expectedrows = parentnode._v_file.params['EXPECTED_ROWS_VLARRAY'] self._v_expectedrows = expectedrows """The expected number of rows to be stored in the array. .. versionadded:: 3.0 """ self._v_chunkshape = None """Private storage for the `chunkshape` property of Leaf.""" # Miscellaneous iteration rubbish. self._start = None """Starting row for the current iteration.""" self._stop = None """Stopping row for the current iteration.""" self._step = None """Step size for the current iteration.""" self._nrowsread = None """Number of rows read up to the current state of iteration.""" self._startb = None """Starting row for current buffer.""" self._stopb = None """Stopping row for current buffer. """ self._row = None """Current row in iterators (sentinel).""" self._init = False """Whether we are in the middle of an iteration or not (sentinel).""" self.listarr = None """Current buffer in iterators.""" # Documented (*public*) attributes. self.atom = atom """ An Atom (see :ref:`AtomClassDescr`) instance representing the *type* and *shape* of the atomic objects to be saved. You may use a *pseudo-atom* for storing a serialized object or variable length string per row. """ self.nrow = None """On iterators, this is the index of the current row.""" self.nrows = None """The current number of rows in the array.""" self.extdim = 0 # VLArray only have one dimension currently """The index of the enlargeable dimension (always 0 for vlarrays).""" # Check the chunkshape parameter if new and chunkshape is not None: if isinstance(chunkshape, (int, numpy.integer, long)): chunkshape = (chunkshape, ) try: chunkshape = tuple(chunkshape) except TypeError: raise TypeError( "`chunkshape` parameter must be an integer or sequence " "and you passed a %s" % type(chunkshape)) if len(chunkshape) != 1: raise ValueError("`chunkshape` rank (length) must be 1: %r" % (chunkshape, )) self._v_chunkshape = tuple(SizeType(s) for s in chunkshape) super(VLArray, self).__init__(parentnode, name, new, filters, byteorder, _log) def _g_post_init_hook(self): super(VLArray, self)._g_post_init_hook() self.nrowsinbuf = 100 # maybe enough for most applications # This is too specific for moving it into Leaf def _calc_chunkshape(self, expectedrows): """Calculate the size for the HDF5 chunk.""" # For computing the chunkshape for HDF5 VL types, we have to # choose the itemsize of the *each* element of the atom and # not the size of the entire atom. I don't know why this # should be like this, perhaps I should report this to the # HDF5 list. # F. Alted 2006-11-23 # elemsize = self.atom.atomsize() elemsize = self._basesize # AV 2013-05-03 # This is just a quick workaround tha allows to change the API for # PyTables 3.0 release and remove the expected_mb parameter. # The algorithm for computing the chunkshape should be rewritten as # requested by gh-35. expected_mb = expectedrows * elemsize / 1024.**2 chunksize = calc_chunksize(expected_mb) # Set the chunkshape chunkshape = chunksize // elemsize # Safeguard against itemsizes being extremely large if chunkshape == 0: chunkshape = 1 return (SizeType(chunkshape), ) def _g_create(self): """Create a variable length array (ragged array).""" atom = self.atom self._v_version = obversion # Check for zero dims in atom shape (not allowed in VLArrays) zerodims = numpy.sum(numpy.array(atom.shape) == 0) if zerodims > 0: raise ValueError("When creating VLArrays, none of the dimensions " "of the Atom instance can be zero.") if not hasattr(atom, 'size'): # it is a pseudo-atom self._atomicdtype = atom.base.dtype self._atomicsize = atom.base.size self._basesize = atom.base.itemsize else: self._atomicdtype = atom.dtype self._atomicsize = atom.size self._basesize = atom.itemsize self._atomictype = atom.type self._atomicshape = atom.shape # Compute the optimal chunkshape, if needed if self._v_chunkshape is None: self._v_chunkshape = self._calc_chunkshape(self._v_expectedrows) self.nrows = SizeType(0) # No rows at creation time # Correct the byteorder if needed if self.byteorder is None: self.byteorder = correct_byteorder(atom.type, sys.byteorder) # After creating the vlarray, ``self._v_objectid`` needs to be # set because it is needed for setting attributes afterwards. self._v_objectid = self._create_array(self._v_new_title) # Add an attribute in case we have a pseudo-atom so that we # can retrieve the proper class after a re-opening operation. if not hasattr(atom, 'size'): # it is a pseudo-atom self.attrs.PSEUDOATOM = atom.kind return self._v_objectid def _g_open(self): """Get the metadata info for an array in file.""" self._v_objectid, self.nrows, self._v_chunkshape, atom = \ self._open_array() # Check if the atom can be a PseudoAtom if "PSEUDOATOM" in self.attrs: kind = self.attrs.PSEUDOATOM if kind == 'vlstring': atom = VLStringAtom() elif kind == 'vlunicode': atom = VLUnicodeAtom() elif kind == 'object': atom = ObjectAtom() else: raise ValueError("pseudo-atom name ``%s`` not known." % kind) elif self._v_file.format_version[:1] == "1": flavor1x = self.attrs.FLAVOR if flavor1x == "VLString": atom = VLStringAtom() elif flavor1x == "Object": atom = ObjectAtom() self.atom = atom return self._v_objectid def _getnobjects(self, nparr): """Return the number of objects in a NumPy array.""" # Check for zero dimensionality array zerodims = numpy.sum(numpy.array(nparr.shape) == 0) if zerodims > 0: # No objects to be added return 0 shape = nparr.shape atom_shape = self.atom.shape shapelen = len(nparr.shape) if isinstance(atom_shape, tuple): atomshapelen = len(self.atom.shape) else: atom_shape = (self.atom.shape, ) atomshapelen = 1 diflen = shapelen - atomshapelen if shape == atom_shape: nobjects = 1 elif (diflen == 1 and shape[diflen:] == atom_shape): # Check if the leading dimensions are all ones # if shape[:diflen-1] == (1,)*(diflen-1): # nobjects = shape[diflen-1] # shape = shape[diflen:] # It's better to accept only inputs with the exact dimensionality # i.e. a dimensionality only 1 element larger than atom nobjects = shape[0] shape = shape[1:] elif atom_shape == (1, ) and shapelen == 1: # Case where shape = (N,) and shape_atom = 1 or (1,) nobjects = shape[0] else: raise ValueError("The object '%s' is composed of elements with " "shape '%s', which is not compatible with the " "atom shape ('%s')." % (nparr, shape, atom_shape)) return nobjects def get_enum(self): """Get the enumerated type associated with this array. If this array is of an enumerated type, the corresponding Enum instance (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated type, a TypeError is raised. """ if self.atom.kind != 'enum': raise TypeError("array ``%s`` is not of an enumerated type" % self._v_pathname) return self.atom.enum getEnum = previous_api(get_enum) def append(self, sequence): """Add a sequence of data to the end of the dataset. This method appends the objects in the sequence to a *single row* in this array. The type and shape of individual objects must be compliant with the atoms in the array. In the case of serialized objects and variable length strings, the object or string to append is itself the sequence. """ self._g_check_open() self._v_file._check_writable() # Prepare the sequence to convert it into a NumPy object atom = self.atom if not hasattr(atom, 'size'): # it is a pseudo-atom sequence = atom.toarray(sequence) statom = atom.base else: try: # fastest check in most cases len(sequence) except TypeError: raise TypeError("argument is not a sequence") statom = atom if len(sequence) > 0: # The sequence needs to be copied to make the operation safe # to in-place conversion. nparr = convert_to_np_atom2(sequence, statom) nobjects = self._getnobjects(nparr) else: nobjects = 0 nparr = None self._append(nparr, nobjects) self.nrows += 1 def iterrows(self, start=None, stop=None, step=None): """Iterate over the rows of the array. This method returns an iterator yielding an object of the current flavor for each selected row in the array. If a range is not supplied, *all the rows* in the array are iterated upon. You can also use the :meth:`VLArray.__iter__` special method for that purpose. If you only want to iterate over a given *range of rows* in the array, you may use the start, stop and step parameters. Examples -------- :: for row in vlarray.iterrows(step=4): print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, row)) .. versionchanged:: 3.0 If the *start* parameter is provided and *stop* is None then the array is iterated from *start* to the last line. In PyTables < 3.0 only one element was returned. """ (self._start, self._stop, self._step) = self._process_range(start, stop, step) self._init_loop() return self def __iter__(self): """Iterate over the rows of the array. This is equivalent to calling :meth:`VLArray.iterrows` with default arguments, i.e. it iterates over *all the rows* in the array. Examples -------- :: result = [row for row in vlarray] Which is equivalent to:: result = [row for row in vlarray.iterrows()] """ if not self._init: # If the iterator is called directly, assign default variables self._start = 0 self._stop = self.nrows self._step = 1 # and initialize the loop self._init_loop() return self def _init_loop(self): """Initialization for the __iter__ iterator.""" self._nrowsread = self._start self._startb = self._start self._row = -1 # Sentinel self._init = True # Sentinel self.nrow = SizeType(self._start - self._step) # row number _initLoop = previous_api(_init_loop) def next(self): """Get the next element of the array during an iteration. The element is returned as a list of objects of the current flavor. """ if self._nrowsread >= self._stop: self._init = False raise StopIteration # end of iteration else: # Read a chunk of rows if self._row + 1 >= self.nrowsinbuf or self._row < 0: self._stopb = self._startb + self._step * self.nrowsinbuf self.listarr = self.read(self._startb, self._stopb, self._step) self._row = -1 self._startb = self._stopb self._row += 1 self.nrow += self._step self._nrowsread += self._step return self.listarr[self._row] def __getitem__(self, key): """Get a row or a range of rows from the array. If key argument is an integer, the corresponding array row is returned as an object of the current flavor. If key is a slice, the range of rows determined by it is returned as a list of objects of the current flavor. In addition, NumPy-style point selections are supported. In particular, if key is a list of row coordinates, the set of rows determined by it is returned. Furthermore, if key is an array of boolean values, only the coordinates where key is True are returned. Note that for the latter to work it is necessary that key list would contain exactly as many rows as the array has. Examples -------- :: a_row = vlarray[4] a_list = vlarray[4:1000:2] a_list2 = vlarray[[0,2]] # get list of coords a_list3 = vlarray[[0,-2]] # negative values accepted a_list4 = vlarray[numpy.array([True,...,False])] # array of bools """ self._g_check_open() if is_idx(key): key = operator.index(key) # Index out of range protection if key >= self.nrows: raise IndexError("Index out of range") if key < 0: # To support negative values key += self.nrows (start, stop, step) = self._process_range(key, key + 1, 1) return self.read(start, stop, step)[0] elif isinstance(key, slice): start, stop, step = self._process_range(key.start, key.stop, key.step) return self.read(start, stop, step) # Try with a boolean or point selection elif type(key) in (list, tuple) or isinstance(key, numpy.ndarray): coords = self._point_selection(key) return self._read_coordinates(coords) else: raise IndexError("Invalid index or slice: %r" % (key, )) def _assign_values(self, coords, values): """Assign the `values` to the positions stated in `coords`.""" for nrow, value in zip(coords, values): if nrow >= self.nrows: raise IndexError("First index out of range") if nrow < 0: # To support negative values nrow += self.nrows object_ = value # Prepare the object to convert it into a NumPy object atom = self.atom if not hasattr(atom, 'size'): # it is a pseudo-atom object_ = atom.toarray(object_) statom = atom.base else: statom = atom value = convert_to_np_atom(object_, statom) nobjects = self._getnobjects(value) # Get the previous value nrow = idx2long(nrow) # To convert any possible numpy scalar value nparr = self._read_array(nrow, nrow + 1, 1)[0] nobjects = len(nparr) if len(value) > nobjects: raise ValueError("Length of value (%s) is larger than number " "of elements in row (%s)" % (len(value), nobjects)) try: nparr[:] = value except Exception as exc: # XXX raise ValueError("Value parameter:\n'%r'\n" "cannot be converted into an array object " "compliant vlarray[%s] row: \n'%r'\n" "The error was: <%s>" % (value, nrow, nparr[:], exc)) if nparr.size > 0: self._modify(nrow, nparr, nobjects) def __setitem__(self, key, value): """Set a row, or set of rows, in the array. It takes different actions depending on the type of the *key* parameter: if it is an integer, the corresponding table row is set to *value* (a record or sequence capable of being converted to the table structure). If *key* is a slice, the row slice determined by it is set to *value* (a record array or sequence of rows capable of being converted to the table structure). In addition, NumPy-style point selections are supported. In particular, if key is a list of row coordinates, the set of rows determined by it is set to value. Furthermore, if key is an array of boolean values, only the coordinates where key is True are set to values from value. Note that for the latter to work it is necessary that key list would contain exactly as many rows as the table has. .. note:: When updating the rows of a VLArray object which uses a pseudo-atom, there is a problem: you can only update values with *exactly* the same size in bytes than the original row. This is very difficult to meet with object pseudo-atoms, because :mod:`pickle` applied on a Python object does not guarantee to return the same number of bytes than over another object, even if they are of the same class. This effectively limits the kinds of objects than can be updated in variable-length arrays. Examples -------- :: vlarray[0] = vlarray[0] * 2 + 3 vlarray[99] = arange(96) * 2 + 3 # Negative values for the index are supported. vlarray[-99] = vlarray[5] * 2 + 3 vlarray[1:30:2] = list_of_rows vlarray[[1,3]] = new_1_and_3_rows """ self._g_check_open() self._v_file._check_writable() if is_idx(key): # If key is not a sequence, convert to it coords = [key] value = [value] elif isinstance(key, slice): (start, stop, step) = self._process_range(key.start, key.stop, key.step) coords = range(start, stop, step) # Try with a boolean or point selection elif type(key) in (list, tuple) or isinstance(key, numpy.ndarray): coords = self._point_selection(key) else: raise IndexError("Invalid index or slice: %r" % (key, )) # Do the assignment row by row self._assign_values(coords, value) # Accessor for the _read_array method in superclass def read(self, start=None, stop=None, step=1): """Get data in the array as a list of objects of the current flavor. Please note that, as the lengths of the different rows are variable, the returned value is a *Python list* (not an array of the current flavor), with as many entries as specified rows in the range parameters. The start, stop and step parameters can be used to select only a *range of rows* in the array. Their meanings are the same as in the built-in range() Python function, except that negative values of step are not allowed yet. Moreover, if only start is specified, then stop will be set to start + 1. If you do not specify neither start nor stop, then *all the rows* in the array are selected. """ self._g_check_open() start, stop, step = self._process_range_read(start, stop, step) if start == stop: listarr = [] else: listarr = self._read_array(start, stop, step) atom = self.atom if not hasattr(atom, 'size'): # it is a pseudo-atom outlistarr = [atom.fromarray(arr) for arr in listarr] else: # Convert the list to the right flavor flavor = self.flavor outlistarr = [internal_to_flavor(arr, flavor) for arr in listarr] return outlistarr def _read_coordinates(self, coords): """Read rows specified in `coords`.""" rows = [] for coord in coords: rows.append(self.read(long(coord))[0]) return rows def _g_copy_with_stats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): """Private part of Leaf.copy() for each kind of leaf.""" # Build the new VLArray object object = VLArray(group, name, self.atom, title=title, filters=filters, expectedrows=self._v_expectedrows, chunkshape=chunkshape, _log=_log) # Now, fill the new vlarray with values from the old one # This is not buffered because we cannot forsee the length # of each record. So, the safest would be a copy row by row. # In the future, some analysis can be done in order to buffer # the copy process. nrowsinbuf = 1 (start, stop, step) = self._process_range_read(start, stop, step) # Optimized version (no conversions, no type and shape checks, etc...) nrowscopied = SizeType(0) nbytes = 0 if not hasattr(self.atom, 'size'): # it is a pseudo-atom atomsize = self.atom.base.size else: atomsize = self.atom.size for start2 in xrange(start, stop, step * nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop nparr = self._read_array(start=start2, stop=stop2, step=step)[0] nobjects = nparr.shape[0] object._append(nparr, nobjects) nbytes += nobjects * atomsize nrowscopied += 1 object.nrows = nrowscopied return (object, nbytes) _g_copyWithStats = previous_api(_g_copy_with_stats) def __repr__(self): """This provides more metainfo in addition to standard __str__""" return """%s atom = %r byteorder = %r nrows = %s flavor = %r""" % (self, self.atom, self.byteorder, self.nrows, self.flavor)
class CArray(Array): """This class represents homogeneous datasets in an HDF5 file. The difference between a CArray and a normal Array (see :ref:`ArrayClassDescr`), from which it inherits, is that a CArray has a chunked layout and, as a consequence, it supports compression. You can use datasets of this class to easily save or load arrays to or from disk, with compression support included. CArray includes all the instance variables and methods of Array. Only those with different behavior are mentioned here. Parameters ---------- parentnode The parent :class:`Group` object. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode*. name : str The name of this node in its parent group. atom An `Atom` instance representing the *type* and *shape* of the atomic objects to be saved. shape The shape of the new array. title A description for this node (it sets the ``TITLE`` HDF5 attribute on disk). filters An instance of the `Filters` class that provides information about the desired I/O filters to be applied during the life of this object. chunkshape The shape of the data chunk to be read or written in a single HDF5 I/O operation. Filters are applied to those chunks of data. The dimensionality of `chunkshape` must be the same as that of `shape`. If ``None``, a sensible value is calculated (which is recommended). byteorder The byteorder of the data *on disk*, specified as 'little' or 'big'. If this is not specified, the byteorder is that of the platform. Examples -------- See below a small example of the use of the `CArray` class. The code is available in ``examples/carray1.py``:: import numpy import tables fileName = 'carray1.h5' shape = (200, 300) atom = tables.UInt8Atom() filters = tables.Filters(complevel=5, complib='zlib') h5f = tables.open_file(fileName, 'w') ca = h5f.create_carray(h5f.root, 'carray', atom, shape, filters=filters) # Fill a hyperslab in ``ca``. ca[10:60, 20:70] = numpy.ones((50, 50)) h5f.close() # Re-open a read another hyperslab h5f = tables.open_file(fileName) print(h5f) print(h5f.root.carray[8:12, 18:22]) h5f.close() The output for the previous script is something like:: carray1.h5 (File) '' Last modif.: 'Thu Apr 12 10:15:38 2007' Object Tree: / (RootGroup) '' /carray (CArray(200, 300), shuffle, zlib(5)) '' [[0 0 0 0] [0 0 0 0] [0 0 1 1] [0 0 1 1]] """ # Class identifier. _c_classid = 'CARRAY' _c_classId = previous_api_property('_c_classid') # Properties # ~~~~~~~~~~ # Special methods # ~~~~~~~~~~~~~~~ def __init__(self, parentnode, name, atom=None, shape=None, title="", filters=None, chunkshape=None, byteorder=None, _log=True): self.atom = atom """An `Atom` instance representing the shape, type of the atomic objects to be saved. """ self.shape = None """The shape of the stored array.""" self.extdim = -1 # `CArray` objects are not enlargeable by default """The index of the enlargeable dimension.""" # Other private attributes self._v_version = None """The object version of this array.""" self._v_new = new = atom is not None """Is this the first time the node has been created?""" self._v_new_title = title """New title for this node.""" self._v_convert = True """Whether the ``Array`` object must be converted or not.""" self._v_chunkshape = chunkshape """Private storage for the `chunkshape` property of the leaf.""" # Miscellaneous iteration rubbish. self._start = None """Starting row for the current iteration.""" self._stop = None """Stopping row for the current iteration.""" self._step = None """Step size for the current iteration.""" self._nrowsread = None """Number of rows read up to the current state of iteration.""" self._startb = None """Starting row for current buffer.""" self._stopb = None """Stopping row for current buffer. """ self._row = None """Current row in iterators (sentinel).""" self._init = False """Whether we are in the middle of an iteration or not (sentinel).""" self.listarr = None """Current buffer in iterators.""" if new: if not isinstance(atom, Atom): raise ValueError("atom parameter should be an instance of " "tables.Atom and you passed a %s." % type(atom)) if shape is None: raise ValueError("you must specify a non-empty shape") try: shape = tuple(shape) except TypeError: raise TypeError("`shape` parameter must be a sequence " "and you passed a %s" % type(shape)) self.shape = tuple(SizeType(s) for s in shape) if chunkshape is not None: try: chunkshape = tuple(chunkshape) except TypeError: raise TypeError( "`chunkshape` parameter must be a sequence " "and you passed a %s" % type(chunkshape)) if len(shape) != len(chunkshape): raise ValueError("the shape (%s) and chunkshape (%s) " "ranks must be equal." % (shape, chunkshape)) elif min(chunkshape) < 1: raise ValueError("chunkshape parameter cannot have " "zero-dimensions.") self._v_chunkshape = tuple(SizeType(s) for s in chunkshape) # The `Array` class is not abstract enough! :( super(Array, self).__init__(parentnode, name, new, filters, byteorder, _log) def _g_create(self): """Create a new array in file (specific part).""" if min(self.shape) < 1: raise ValueError("shape parameter cannot have zero-dimensions.") # Finish the common part of creation process return self._g_create_common(self.nrows) def _g_create_common(self, expectedrows): """Create a new array in file (common part).""" self._v_version = obversion if self._v_chunkshape is None: # Compute the optimal chunk size self._v_chunkshape = self._calc_chunkshape(expectedrows, self.rowsize, self.atom.size) # Compute the optimal nrowsinbuf self.nrowsinbuf = self._calc_nrowsinbuf() # Correct the byteorder if needed if self.byteorder is None: self.byteorder = correct_byteorder(self.atom.type, sys.byteorder) try: # ``self._v_objectid`` needs to be set because would be # needed for setting attributes in some descendants later # on self._v_objectid = self._create_carray(self._v_new_title) except: # XXX # Problems creating the Array on disk. Close node and re-raise. self.close(flush=0) raise return self._v_objectid def _g_copy_with_stats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): """Private part of Leaf.copy() for each kind of leaf.""" (start, stop, step) = self._process_range_read(start, stop, step) maindim = self.maindim shape = list(self.shape) shape[maindim] = len(xrange(0, stop - start, step)) # Now, fill the new carray with values from source nrowsinbuf = self.nrowsinbuf # The slices parameter for self.__getitem__ slices = [slice(0, dim, 1) for dim in self.shape] # This is a hack to prevent doing unnecessary conversions # when copying buffers self._v_convert = False # Build the new CArray object object = CArray(group, name, atom=self.atom, shape=shape, title=title, filters=filters, chunkshape=chunkshape, _log=_log) # Start the copy itself for start2 in xrange(start, stop, step * nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the main dimension slices[maindim] = slice(start2, stop2, step) start3 = (start2 - start) // step stop3 = start3 + nrowsinbuf if stop3 > shape[maindim]: stop3 = shape[maindim] # The next line should be generalised if, in the future, # maindim is designed to be different from 0 in CArrays. # See ticket #199. object[start3:stop3] = self.__getitem__(tuple(slices)) # Activate the conversion again (default) self._v_convert = True nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size return (object, nbytes) _g_copyWithStats = previous_api(_g_copy_with_stats)
class Array(hdf5extension.Array, Leaf): """This class represents homogeneous datasets in an HDF5 file. This class provides methods to write or read data to or from array objects in the file. This class does not allow you neither to enlarge nor compress the datasets on disk; use the EArray class (see :ref:`EArrayClassDescr`) if you want enlargeable dataset support or compression features, or CArray (see :ref:`CArrayClassDescr`) if you just want compression. An interesting property of the Array class is that it remembers the *flavor* of the object that has been saved so that if you saved, for example, a list, you will get a list during readings afterwards; if you saved a NumPy array, you will get a NumPy object, and so forth. Note that this class inherits all the public attributes and methods that Leaf (see :ref:`LeafClassDescr`) already provides. However, as Array instances have no internal I/O buffers, it is not necessary to use the flush() method they inherit from Leaf in order to save their internal state to disk. When a writing method call returns, all the data is already on disk. Parameters ---------- parentnode The parent :class:`Group` object. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode* name : str The name of this node in its parent group. obj The array or scalar to be saved. Accepted types are NumPy arrays and scalars as well as native Python sequences and scalars, provided that values are regular (i.e. they are not like ``[[1,2],2]``) and homogeneous (i.e. all the elements are of the same type). .. versionchanged:: 3.0 Renamed form *object* into *obj*. title A description for this node (it sets the ``TITLE`` HDF5 attribute on disk). byteorder The byteorder of the data *on disk*, specified as 'little' or 'big'. If this is not specified, the byteorder is that of the given `object`. """ # Class identifier. _c_classid = 'ARRAY' _c_classId = previous_api_property('_c_classid') _v_objectId = previous_api_property('_v_objectid') # Lazy read-only attributes # ````````````````````````` @lazyattr def dtype(self): """The NumPy ``dtype`` that most closely matches this array.""" return self.atom.dtype # Properties # ~~~~~~~~~~ def _getnrows(self): if self.shape == (): return SizeType(1) # scalar case else: return self.shape[self.maindim] nrows = property(_getnrows, None, None, "The number of rows in the array.") def _getrowsize(self): maindim = self.maindim rowsize = self.atom.size for i, dim in enumerate(self.shape): if i != maindim: rowsize *= dim return rowsize rowsize = property( _getrowsize, None, None, "The size of the rows in bytes in dimensions orthogonal to *maindim*.") size_in_memory = property( lambda self: self.nrows * self.rowsize, None, None, """The size of this array's data in bytes when it is fully loaded into memory.""") # Other methods # ~~~~~~~~~~~~~ def __init__(self, parentnode, name, obj=None, title="", byteorder=None, _log=True, _atom=None): self._v_version = None """The object version of this array.""" self._v_new = new = obj is not None """Is this the first time the node has been created?""" self._v_new_title = title """New title for this node.""" self._obj = obj """The object to be stored in the array. It can be any of numpy, list, tuple, string, integer of floating point types, provided that they are regular (i.e. they are not like ``[[1, 2], 2]``). .. versionchanged:: 3.0 Renamed form *_object* into *_obj*. """ self._v_convert = True """Whether the ``Array`` object must be converted or not.""" # Miscellaneous iteration rubbish. self._start = None """Starting row for the current iteration.""" self._stop = None """Stopping row for the current iteration.""" self._step = None """Step size for the current iteration.""" self._nrowsread = None """Number of rows read up to the current state of iteration.""" self._startb = None """Starting row for current buffer.""" self._stopb = None """Stopping row for current buffer. """ self._row = None """Current row in iterators (sentinel).""" self._init = False """Whether we are in the middle of an iteration or not (sentinel).""" self.listarr = None """Current buffer in iterators.""" # Documented (*public*) attributes. self.atom = _atom """An Atom (see :ref:`AtomClassDescr`) instance representing the *type* and *shape* of the atomic objects to be saved. """ self.shape = None """The shape of the stored array.""" self.nrow = None """On iterators, this is the index of the current row.""" self.extdim = -1 # ordinary arrays are not enlargeable """The index of the enlargeable dimension.""" # Ordinary arrays have no filters: leaf is created with default ones. super(Array, self).__init__(parentnode, name, new, Filters(), byteorder, _log) def _g_create(self): """Save a new array in file.""" self._v_version = obversion try: # `Leaf._g_post_init_hook()` should be setting the flavor on disk. self._flavor = flavor = flavor_of(self._obj) nparr = array_as_internal(self._obj, flavor) except: # XXX # Problems converting data. Close the node and re-raise exception. self.close(flush=0) raise # Raise an error in case of unsupported object if nparr.dtype.kind in ['V', 'U', 'O']: # in void, unicode, object raise TypeError("Array objects cannot currently deal with void, " "unicode or object arrays") # Decrease the number of references to the object self._obj = None # Fix the byteorder of data nparr = self._g_fix_byteorder_data(nparr, nparr.dtype.byteorder) # Create the array on-disk try: # ``self._v_objectid`` needs to be set because would be # needed for setting attributes in some descendants later # on (self._v_objectid, self.shape, self.atom) = self._create_array(nparr, self._v_new_title, self.atom) except: # XXX # Problems creating the Array on disk. Close node and re-raise. self.close(flush=0) raise # Compute the optimal buffer size self.nrowsinbuf = self._calc_nrowsinbuf() # Arrays don't have chunkshapes (so, set it to None) self._v_chunkshape = None return self._v_objectid def _g_open(self): """Get the metadata info for an array in file.""" (oid, self.atom, self.shape, self._v_chunkshape) = self._open_array() self.nrowsinbuf = self._calc_nrowsinbuf() return oid def get_enum(self): """Get the enumerated type associated with this array. If this array is of an enumerated type, the corresponding Enum instance (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated type, a TypeError is raised. """ if self.atom.kind != 'enum': raise TypeError("array ``%s`` is not of an enumerated type" % self._v_pathname) return self.atom.enum getEnum = previous_api(get_enum) def iterrows(self, start=None, stop=None, step=None): """Iterate over the rows of the array. This method returns an iterator yielding an object of the current flavor for each selected row in the array. The returned rows are taken from the *main dimension*. If a range is not supplied, *all the rows* in the array are iterated upon - you can also use the :meth:`Array.__iter__` special method for that purpose. If you only want to iterate over a given *range of rows* in the array, you may use the start, stop and step parameters. Examples -------- :: result = [row for row in arrayInstance.iterrows(step=4)] .. versionchanged:: 3.0 If the *start* parameter is provided and *stop* is None then the array is iterated from *start* to the last line. In PyTables < 3.0 only one element was returned. """ try: (self._start, self._stop, self._step) = self._process_range(start, stop, step) except IndexError: # If problems with indexes, silently return the null tuple return () self._init_loop() return self def __iter__(self): """Iterate over the rows of the array. This is equivalent to calling :meth:`Array.iterrows` with default arguments, i.e. it iterates over *all the rows* in the array. Examples -------- :: result = [row[2] for row in array] Which is equivalent to:: result = [row[2] for row in array.iterrows()] """ if not self._init: # If the iterator is called directly, assign default variables self._start = 0 self._stop = self.nrows self._step = 1 # and initialize the loop self._init_loop() return self def _init_loop(self): """Initialization for the __iter__ iterator.""" self._nrowsread = self._start self._startb = self._start self._row = -1 # Sentinel self._init = True # Sentinel self.nrow = SizeType(self._start - self._step) # row number _initLoop = previous_api(_init_loop) def next(self): """Get the next element of the array during an iteration. The element is returned as an object of the current flavor. """ # this could probably be sped up for long iterations by reusing the # listarr buffer if self._nrowsread >= self._stop: self._init = False self.listarr = None # fixes issue #308 raise StopIteration # end of iteration else: # Read a chunk of rows if self._row + 1 >= self.nrowsinbuf or self._row < 0: self._stopb = self._startb + self._step * self.nrowsinbuf # Protection for reading more elements than needed if self._stopb > self._stop: self._stopb = self._stop listarr = self._read(self._startb, self._stopb, self._step) # Swap the axes to easy the return of elements if self.extdim > 0: listarr = listarr.swapaxes(self.extdim, 0) self.listarr = internal_to_flavor(listarr, self.flavor) self._row = -1 self._startb = self._stopb self._row += 1 self.nrow += self._step self._nrowsread += self._step # Fixes bug #968132 # if self.listarr.shape: if self.shape: return self.listarr[self._row] else: return self.listarr # Scalar case def _interpret_indexing(self, keys): """Internal routine used by __getitem__ and __setitem__""" maxlen = len(self.shape) shape = (maxlen, ) startl = numpy.empty(shape=shape, dtype=SizeType) stopl = numpy.empty(shape=shape, dtype=SizeType) stepl = numpy.empty(shape=shape, dtype=SizeType) stop_None = numpy.zeros(shape=shape, dtype=SizeType) if not isinstance(keys, tuple): keys = (keys, ) nkeys = len(keys) dim = 0 # Here is some problem when dealing with [...,...] params # but this is a bit weird way to pass parameters anyway for key in keys: ellipsis = 0 # Sentinel if isinstance(key, type(Ellipsis)): ellipsis = 1 for diml in xrange(dim, len(self.shape) - (nkeys - dim) + 1): startl[dim] = 0 stopl[dim] = self.shape[diml] stepl[dim] = 1 dim += 1 elif dim >= maxlen: raise IndexError("Too many indices for object '%s'" % self._v_pathname) elif is_idx(key): # Protection for index out of range if key >= self.shape[dim]: raise IndexError("Index out of range") if key < 0: # To support negative values (Fixes bug #968149) key += self.shape[dim] start, stop, step = self._process_range(key, key + 1, 1, dim=dim) stop_None[dim] = 1 elif isinstance(key, slice): start, stop, step = self._process_range(key.start, key.stop, key.step, dim=dim) else: raise TypeError("Non-valid index or slice: %s" % key) if not ellipsis: startl[dim] = start stopl[dim] = stop stepl[dim] = step dim += 1 # Complete the other dimensions, if needed if dim < len(self.shape): for diml in xrange(dim, len(self.shape)): startl[dim] = 0 stopl[dim] = self.shape[diml] stepl[dim] = 1 dim += 1 # Compute the shape for the container properly. Fixes #1288792 shape = [] for dim in xrange(len(self.shape)): # The negative division operates differently with python scalars # and numpy scalars (which are similar to C conventions). See: # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3 # and # http://www.peterbe.com/Integer-division-in-programming-languages # for more info on this issue. # I've finally decided to rely on the len(xrange) function. # F. Alted 2006-09-25 # Switch to `lrange` to allow long ranges (see #99). # use xrange, since it supports large integers as of Python 2.6 # see github #181 new_dim = len(xrange(startl[dim], stopl[dim], stepl[dim])) if not (new_dim == 1 and stop_None[dim]): shape.append(new_dim) return startl, stopl, stepl, shape def _fancy_selection(self, args): """Performs a NumPy-style fancy selection in `self`. Implements advanced NumPy-style selection operations in addition to the standard slice-and-int behavior. Indexing arguments may be ints, slices or lists of indices. Note: This is a backport from the h5py project. """ # Internal functions def validate_number(num, length): """Validate a list member for the given axis length.""" try: num = long(num) except TypeError: raise TypeError("Illegal index: %r" % num) if num > length - 1: raise IndexError("Index out of bounds: %d" % num) def expand_ellipsis(args, rank): """Expand ellipsis objects and fill in missing axes.""" n_el = sum(1 for arg in args if arg is Ellipsis) if n_el > 1: raise IndexError("Only one ellipsis may be used.") elif n_el == 0 and len(args) != rank: args = args + (Ellipsis, ) final_args = [] n_args = len(args) for idx, arg in enumerate(args): if arg is Ellipsis: final_args.extend((slice(None), ) * (rank - n_args + 1)) else: final_args.append(arg) if len(final_args) > rank: raise IndexError("Too many indices.") return final_args def translate_slice(exp, length): """Given a slice object, return a 3-tuple (start, count, step) This is for for use with the hyperslab selection routines. """ start, stop, step = exp.start, exp.stop, exp.step if start is None: start = 0 else: start = long(start) if stop is None: stop = length else: stop = long(stop) if step is None: step = 1 else: step = long(step) if step < 1: raise IndexError("Step must be >= 1 (got %d)" % step) if stop == start: raise IndexError("Zero-length selections are not allowed") if stop < start: raise IndexError("Reverse-order selections are not allowed") if start < 0: start = length + start if stop < 0: stop = length + stop if not 0 <= start <= (length - 1): raise IndexError("Start index %s out of range (0-%d)" % (start, length - 1)) if not 1 <= stop <= length: raise IndexError("Stop index %s out of range (1-%d)" % (stop, length)) count = (stop - start) // step if (stop - start) % step != 0: count += 1 if start + count > length: raise IndexError("Selection out of bounds (%d; axis has %d)" % (start + count, length)) return start, count, step # Main code for _fancy_selection mshape = [] selection = [] if not isinstance(args, tuple): args = (args, ) args = expand_ellipsis(args, len(self.shape)) list_seen = False reorder = None for idx, (exp, length) in enumerate(zip(args, self.shape)): if isinstance(exp, slice): start, count, step = translate_slice(exp, length) selection.append((start, count, step, idx, "AND")) mshape.append(count) else: try: exp = list(exp) except TypeError: exp = [exp] # Handle scalar index as a list of length 1 mshape.append(0) # Keep track of scalar index for NumPy else: mshape.append(len(exp)) if len(exp) == 0: raise IndexError( "Empty selections are not allowed (axis %d)" % idx) elif len(exp) > 1: if list_seen: raise IndexError("Only one selection list is allowed") else: list_seen = True else: if (not isinstance(exp[0], (int, long, numpy.integer)) or (isinstance(exp[0], numpy.ndarray) and not numpy.issubdtype(exp[0].dtype, numpy.integer))): raise TypeError("Only integer coordinates allowed.") nexp = numpy.asarray(exp, dtype="i8") # Convert negative values nexp = numpy.where(nexp < 0, length + nexp, nexp) # Check whether the list is ordered or not # (only one unordered list is allowed) if not len(nexp) == len(numpy.unique(nexp)): raise IndexError( "Selection lists cannot have repeated values") neworder = nexp.argsort() if (neworder.shape != (len(exp), ) or numpy.sum( numpy.abs(neworder - numpy.arange(len(exp)))) != 0): if reorder is not None: raise IndexError( "Only one selection list can be unordered") corrected_idx = sum(1 for x in mshape if x != 0) - 1 reorder = (corrected_idx, neworder) nexp = nexp[neworder] for select_idx in xrange(len(nexp) + 1): # This crazy piece of code performs a list selection # using HDF5 hyperslabs. # For each index, perform a "NOTB" selection on every # portion of *this axis* which falls *outside* the list # selection. For this to work, the input array MUST be # monotonically increasing. if select_idx < len(nexp): validate_number(nexp[select_idx], length) if select_idx == 0: start = 0 count = nexp[0] elif select_idx == len(nexp): start = nexp[-1] + 1 count = length - start else: start = nexp[select_idx - 1] + 1 count = nexp[select_idx] - start if count > 0: selection.append((start, count, 1, idx, "NOTB")) mshape = tuple(x for x in mshape if x != 0) return selection, reorder, mshape _fancySelection = previous_api(_fancy_selection) def __getitem__(self, key): """Get a row, a range of rows or a slice from the array. The set of tokens allowed for the key is the same as that for extended slicing in Python (including the Ellipsis or ... token). The result is an object of the current flavor; its shape depends on the kind of slice used as key and the shape of the array itself. Furthermore, NumPy-style fancy indexing, where a list of indices in a certain axis is specified, is also supported. Note that only one list per selection is supported right now. Finally, NumPy-style point and boolean selections are supported as well. Examples -------- :: array1 = array[4] # simple selection array2 = array[4:1000:2] # slice selection array3 = array[1, ..., ::2, 1:4, 4:] # general slice selection array4 = array[1, [1,5,10], ..., -1] # fancy selection array5 = array[np.where(array[:] > 4)] # point selection array6 = array[array[:] > 4] # boolean selection """ self._g_check_open() try: # First, try with a regular selection startl, stopl, stepl, shape = self._interpret_indexing(key) arr = self._read_slice(startl, stopl, stepl, shape) except TypeError: # Then, try with a point-wise selection try: coords = self._point_selection(key) arr = self._read_coords(coords) except TypeError: # Finally, try with a fancy selection selection, reorder, shape = self._fancy_selection(key) arr = self._read_selection(selection, reorder, shape) if self.flavor == "numpy" or not self._v_convert: return arr return internal_to_flavor(arr, self.flavor) def __setitem__(self, key, value): """Set a row, a range of rows or a slice in the array. It takes different actions depending on the type of the key parameter: if it is an integer, the corresponding array row is set to value (the value is broadcast when needed). If key is a slice, the row slice determined by it is set to value (as usual, if the slice to be updated exceeds the actual shape of the array, only the values in the existing range are updated). If value is a multidimensional object, then its shape must be compatible with the shape determined by key, otherwise, a ValueError will be raised. Furthermore, NumPy-style fancy indexing, where a list of indices in a certain axis is specified, is also supported. Note that only one list per selection is supported right now. Finally, NumPy-style point and boolean selections are supported as well. Examples -------- :: a1[0] = 333 # assign an integer to a Integer Array row a2[0] = 'b' # assign a string to a string Array row a3[1:4] = 5 # broadcast 5 to slice 1:4 a4[1:4:2] = 'xXx' # broadcast 'xXx' to slice 1:4:2 # General slice update (a5.shape = (4,3,2,8,5,10). a5[1, ..., ::2, 1:4, 4:] = numpy.arange(1728, shape=(4,3,2,4,3,6)) a6[1, [1,5,10], ..., -1] = arr # fancy selection a7[np.where(a6[:] > 4)] = 4 # point selection + broadcast a8[arr > 4] = arr2 # boolean selection """ self._g_check_open() # Create an array compliant with the specified slice nparr = convert_to_np_atom2(value, self.atom) if nparr.size == 0: return # truncate data if least_significant_digit filter is set # TODO: add the least_significant_digit attribute to the array on disk if (self.filters.least_significant_digit is not None and not numpy.issubdtype(nparr.dtype, int)): nparr = quantize(nparr, self.filters.least_significant_digit) try: startl, stopl, stepl, shape = self._interpret_indexing(key) self._write_slice(startl, stopl, stepl, shape, nparr) except TypeError: # Then, try with a point-wise selection try: coords = self._point_selection(key) self._write_coords(coords, nparr) except TypeError: selection, reorder, shape = self._fancy_selection(key) self._write_selection(selection, reorder, shape, nparr) def _check_shape(self, nparr, slice_shape): """Test that nparr shape is consistent with underlying object. If not, try creating a new nparr object, using broadcasting if necessary. """ if nparr.shape != (slice_shape + self.atom.dtype.shape): # Create an array compliant with the specified shape narr = numpy.empty(shape=slice_shape, dtype=self.atom.dtype) # Assign the value to it. It will raise a ValueError exception # if the objects cannot be broadcast to a single shape. narr[...] = nparr return narr else: return nparr _checkShape = previous_api(_check_shape) def _read_slice(self, startl, stopl, stepl, shape): """Read a slice based on `startl`, `stopl` and `stepl`.""" nparr = numpy.empty(dtype=self.atom.dtype, shape=shape) # Protection against reading empty arrays if 0 not in shape: # Arrays that have non-zero dimensionality self._g_read_slice(startl, stopl, stepl, nparr) # For zero-shaped arrays, return the scalar if nparr.shape == (): nparr = nparr[()] return nparr _readSlice = previous_api(_read_slice) def _read_coords(self, coords): """Read a set of points defined by `coords`.""" nparr = numpy.empty(dtype=self.atom.dtype, shape=len(coords)) if len(coords) > 0: self._g_read_coords(coords, nparr) # For zero-shaped arrays, return the scalar if nparr.shape == (): nparr = nparr[()] return nparr _readCoords = previous_api(_read_coords) def _read_selection(self, selection, reorder, shape): """Read a `selection`. Reorder if necessary. """ # Create the container for the slice nparr = numpy.empty(dtype=self.atom.dtype, shape=shape) # Arrays that have non-zero dimensionality self._g_read_selection(selection, nparr) # For zero-shaped arrays, return the scalar if nparr.shape == (): nparr = nparr[()] elif reorder is not None: # We need to reorder the array idx, neworder = reorder k = [slice(None)] * len(shape) k[idx] = neworder.argsort() # Apparently, a copy is not needed here, but doing it # for symmetry with the `_write_selection()` method. nparr = nparr[k].copy() return nparr _readSelection = previous_api(_read_selection) def _write_slice(self, startl, stopl, stepl, shape, nparr): """Write `nparr` in a slice based on `startl`, `stopl` and `stepl`.""" nparr = self._check_shape(nparr, tuple(shape)) countl = ((stopl - startl - 1) // stepl) + 1 self._g_write_slice(startl, stepl, countl, nparr) _writeSlice = previous_api(_write_slice) def _write_coords(self, coords, nparr): """Write `nparr` values in points defined by `coords` coordinates.""" if len(coords) > 0: nparr = self._check_shape(nparr, (len(coords), )) self._g_write_coords(coords, nparr) _writeCoords = previous_api(_write_coords) def _write_selection(self, selection, reorder, shape, nparr): """Write `nparr` in `selection`. Reorder if necessary. """ nparr = self._check_shape(nparr, tuple(shape)) # Check whether we should reorder the array if reorder is not None: idx, neworder = reorder k = [slice(None)] * len(shape) k[idx] = neworder # For a reason a don't understand well, we need a copy of # the reordered array nparr = nparr[k].copy() self._g_write_selection(selection, nparr) _writeSelection = previous_api(_write_selection) def _read(self, start, stop, step, out=None): """Read the array from disk without slice or flavor processing.""" nrowstoread = len(xrange(0, stop - start, step)) shape = list(self.shape) if shape: shape[self.maindim] = nrowstoread if out is None: arr = numpy.empty(dtype=self.atom.dtype, shape=shape) else: bytes_required = self.rowsize * nrowstoread # if buffer is too small, it will segfault if bytes_required != out.nbytes: raise ValueError( ('output array size invalid, got {0} bytes, ' 'need {1} bytes').format(out.nbytes, bytes_required)) if not out.flags['C_CONTIGUOUS']: raise ValueError('output array not C contiguous') arr = out # Protection against reading empty arrays if 0 not in shape: # Arrays that have non-zero dimensionality self._read_array(start, stop, step, arr) # data is always read in the system byteorder # if the out array's byteorder is different, do a byteswap if (out is not None and byteorders[arr.dtype.byteorder] != sys.byteorder): arr.byteswap(True) return arr def read(self, start=None, stop=None, step=None, out=None): """Get data in the array as an object of the current flavor. The start, stop and step parameters can be used to select only a *range of rows* in the array. Their meanings are the same as in the built-in range() Python function, except that negative values of step are not allowed yet. Moreover, if only start is specified, then stop will be set to start + 1. If you do not specify neither start nor stop, then *all the rows* in the array are selected. The out parameter may be used to specify a NumPy array to receive the output data. Note that the array must have the same size as the data selected with the other parameters. Note that the array's datatype is not checked and no type casting is performed, so if it does not match the datatype on disk, the output will not be correct. Also, this parameter is only valid when the array's flavor is set to 'numpy'. Otherwise, a TypeError will be raised. When data is read from disk in NumPy format, the output will be in the current system's byteorder, regardless of how it is stored on disk. The exception is when an output buffer is supplied, in which case the output will be in the byteorder of that output buffer. .. versionchanged:: 3.0 Added the *out* parameter. """ self._g_check_open() if out is not None and self.flavor != 'numpy': msg = ("Optional 'out' argument may only be supplied if array " "flavor is 'numpy', currently is {0}").format(self.flavor) raise TypeError(msg) (start, stop, step) = self._process_range_read(start, stop, step) arr = self._read(start, stop, step, out) return internal_to_flavor(arr, self.flavor) def _g_copy_with_stats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): """Private part of Leaf.copy() for each kind of leaf.""" # Compute the correct indices. (start, stop, step) = self._process_range_read(start, stop, step) # Get the slice of the array # (non-buffered version) if self.shape: arr = self[start:stop:step] else: arr = self[()] # Build the new Array object. Use the _atom reserved keyword # just in case the array is being copied from a native HDF5 # with atomic types different from scalars. # For details, see #275 of trac. object_ = Array(group, name, arr, title=title, _log=_log, _atom=self.atom) nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size return (object_, nbytes) _g_copyWithStats = previous_api(_g_copy_with_stats) def __repr__(self): """This provides more metainfo in addition to standard __str__""" return """%s atom := %r maindim := %r flavor := %r byteorder := %r chunkshape := %r""" % (self, self.atom, self.maindim, self.flavor, self.byteorder, self.chunkshape)
class Node(object): """Abstract base class for all PyTables nodes. This is the base class for *all* nodes in a PyTables hierarchy. It is an abstract class, i.e. it may not be directly instantiated; however, every node in the hierarchy is an instance of this class. A PyTables node is always hosted in a PyTables *file*, under a *parent group*, at a certain *depth* in the node hierarchy. A node knows its own *name* in the parent group and its own *path name* in the file. All the previous information is location-dependent, i.e. it may change when moving or renaming a node in the hierarchy. A node also has location-independent information, such as its *HDF5 object identifier* and its *attribute set*. This class gathers the operations and attributes (both location-dependent and independent) which are common to all PyTables nodes, whatever their type is. Nonetheless, due to natural naming restrictions, the names of all of these members start with a reserved prefix (see the Group class in :ref:`GroupClassDescr`). Sub-classes with no children (e.g. *leaf nodes*) may define new methods, attributes and properties to avoid natural naming restrictions. For instance, _v_attrs may be shortened to attrs and _f_rename to rename. However, the original methods and attributes should still be available. .. rubric:: Node attributes .. attribute:: _v_depth The depth of this node in the tree (an non-negative integer value). .. attribute:: _v_file The hosting File instance (see :ref:`FileClassDescr`). .. attribute:: _v_name The name of this node in its parent group (a string). .. attribute:: _v_pathname The path of this node in the tree (a string). .. attribute:: _v_objectid A node identifier (may change from run to run). .. versionchanged:: 3.0 The *_v_objectID* attribute has been renamed into *_v_object_id*. """ # This makes this class and all derived subclasses be handled by MetaNode. __metaclass__ = MetaNode # By default, attributes accept Undo/Redo. _AttributeSet = AttributeSet # `_v_parent` is accessed via its file to avoid upwards references. def _g_getparent(self): (parentpath, nodename) = split_path(self._v_pathname) return self._v_file._get_node(parentpath) _v_parent = property(_g_getparent, None, None, ("The parent :class:`Group` instance")) # '_v_attrs' is defined as a lazy read-only attribute. # This saves 0.7s/3.8s. @lazyattr def _v_attrs(self): """The associated `AttributeSet` instance. See Also -------- tables.attributeset.AttributeSet : container for the HDF5 attributes """ return self._AttributeSet(self) # '_v_title' is a direct read-write shorthand for the 'TITLE' attribute # with the empty string as a default value. def _g_gettitle(self): if hasattr(self._v_attrs, 'TITLE'): return self._v_attrs.TITLE else: return '' def _g_settitle(self, title): self._v_attrs.TITLE = title _v_title = property(_g_gettitle, _g_settitle, None, ("A description of this node. A shorthand for " "TITLE attribute.")) # This may be looked up by ``__del__`` when ``__init__`` doesn't get # to be called. See ticket #144 for more info. _v_isopen = False """Whehter this node is open or not.""" _v_objectId = previous_api_property('_v_objectid') _v_maxTreeDepth = previous_api_property('_v_maxtreedepth') # The ``_log`` argument is only meant to be used by ``_g_copy_as_child()`` # to avoid logging the creation of children nodes of a copied sub-tree. def __init__(self, parentnode, name, _log=True): # Remember to assign these values in the root group constructor # as it does not use this method implementation! # if the parent node is a softlink, dereference it if isinstance(parentnode, class_name_dict['SoftLink']): parentnode = parentnode.dereference() self._v_file = None """The hosting File instance (see :ref:`FileClassDescr`).""" self._v_isopen = False """Whether this node is open or not.""" self._v_pathname = None """The path of this node in the tree (a string).""" self._v_name = None """The name of this node in its parent group (a string).""" self._v_depth = None """The depth of this node in the tree (an non-negative integer value). """ self._v_maxtreedepth = parentnode._v_file.params['MAX_TREE_DEPTH'] """Maximum tree depth before warning the user. .. versionchanged:: 3.0 Renamed into *_v_maxtreedepth* from *_v_maxTreeDepth*. """ self._v__deleting = False """Is the node being deleted?""" self._v_objectid = None """A node identifier (may change from run to run). .. versionchanged:: 3.0 The *_v_objectID* attribute has been renamed into *_v_objectid*. """ validate = new = self._v_new # set by subclass constructor # Is the parent node a group? Is it open? self._g_check_group(parentnode) parentnode._g_check_open() file_ = parentnode._v_file # Will the file be able to host a new node? if new: file_._check_writable() # Bind to the parent node and set location-dependent information. if new: # Only new nodes need to be referenced. # Opened nodes are already known by their parent group. parentnode._g_refnode(self, name, validate) self._g_set_location(parentnode, name) try: # hdf5extension operations: # Update node attributes. self._g_new(parentnode, name, init=True) # Create or open the node and get its object ID. if new: self._v_objectid = self._g_create() else: self._v_objectid = self._g_open() # The node *has* been created, log that. if new and _log and file_.is_undo_enabled(): self._g_log_create() # This allows extra operations after creating the node. self._g_post_init_hook() except: # If anything happens, the node must be closed # to undo every possible registration made so far. # We do *not* rely on ``__del__()`` doing it later, # since it might never be called anyway. self._f_close() raise def _g_log_create(self): self._v_file._log('CREATE', self._v_pathname) _g_logCreate = previous_api(_g_log_create) def __del__(self): # Closed `Node` instances can not be killed and revived. # Instead, accessing a closed and deleted (from memory, not # disk) one yields a *new*, open `Node` instance. This is # because of two reasons: # # 1. Predictability. After closing a `Node` and deleting it, # only one thing can happen when accessing it again: a new, # open `Node` instance is returned. If closed nodes could be # revived, one could get either a closed or an open `Node`. # # 2. Ease of use. If the user wants to access a closed node # again, the only condition would be that no references to # the `Node` instance were left. If closed nodes could be # revived, the user would also need to force the closed # `Node` out of memory, which is not a trivial task. # if not self._v_isopen: return # the node is already closed or not initialized self._v__deleting = True # If we get here, the `Node` is still open. try: node_manager = self._v_file._node_manager node_manager.drop_node(self, check_unregistered=False) finally: # At this point the node can still be open if there is still some # alive reference around (e.g. if the __del__ method is called # explicitly by the user). if self._v_isopen: self._v__deleting = True self._f_close() def _g_pre_kill_hook(self): """Code to be called before killing the node.""" pass _g_preKillHook = previous_api(_g_pre_kill_hook) def _g_create(self): """Create a new HDF5 node and return its object identifier.""" raise NotImplementedError def _g_open(self): """Open an existing HDF5 node and return its object identifier.""" raise NotImplementedError def _g_check_open(self): """Check that the node is open. If the node is closed, a `ClosedNodeError` is raised. """ if not self._v_isopen: raise ClosedNodeError("the node object is closed") assert self._v_file.isopen, "found an open node in a closed file" _g_checkOpen = previous_api(_g_check_open) def _g_set_location(self, parentnode, name): """Set location-dependent attributes. Sets the location-dependent attributes of this node to reflect that it is placed under the specified `parentnode`, with the specified `name`. This also triggers the insertion of file references to this node. If the maximum recommended tree depth is exceeded, a `PerformanceWarning` is issued. """ file_ = parentnode._v_file parentdepth = parentnode._v_depth self._v_file = file_ self._v_isopen = True root_uep = file_.root_uep if name.startswith(root_uep): # This has been called from File._get_node() assert parentdepth == 0 if root_uep == "/": self._v_pathname = name else: self._v_pathname = name[len(root_uep):] _, self._v_name = split_path(name) self._v_depth = name.count("/") - root_uep.count("/") + 1 else: # If we enter here is because this has been called elsewhere self._v_name = name self._v_pathname = join_path(parentnode._v_pathname, name) self._v_depth = parentdepth + 1 # Check if the node is too deep in the tree. if parentdepth >= self._v_maxtreedepth: warnings.warn( """\ node ``%s`` is exceeding the recommended maximum depth (%d);\ be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" % (self._v_pathname, self._v_maxtreedepth), PerformanceWarning) if self._v_pathname != '/': file_._node_manager.cache_node(self, self._v_pathname) _g_setLocation = previous_api(_g_set_location) def _g_update_location(self, newparentpath): """Update location-dependent attributes. Updates location data when an ancestor node has changed its location in the hierarchy to `newparentpath`. In fact, this method is expected to be called by an ancestor of this node. This also triggers the update of file references to this node. If the maximum recommended node depth is exceeded, a `PerformanceWarning` is issued. This warning is assured to be unique. """ oldpath = self._v_pathname newpath = join_path(newparentpath, self._v_name) newdepth = newpath.count('/') self._v_pathname = newpath self._v_depth = newdepth # Check if the node is too deep in the tree. if newdepth > self._v_maxtreedepth: warnings.warn( """\ moved descendent node is exceeding the recommended maximum depth (%d);\ be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" % (self._v_maxtreedepth, ), PerformanceWarning) node_manager = self._v_file._node_manager node_manager.rename_node(oldpath, newpath) # Tell dependent objects about the new location of this node. self._g_update_dependent() _g_updateLocation = previous_api(_g_update_location) def _g_del_location(self): """Clear location-dependent attributes. This also triggers the removal of file references to this node. """ node_manager = self._v_file._node_manager pathname = self._v_pathname if not self._v__deleting: node_manager.drop_from_cache(pathname) # Note: node_manager.drop_node do not removes the node form the # registry if it is still open node_manager.registry.pop(pathname, None) self._v_file = None self._v_isopen = False self._v_pathname = None self._v_name = None self._v_depth = None _g_delLocation = previous_api(_g_del_location) def _g_post_init_hook(self): """Code to be run after node creation and before creation logging.""" pass _g_postInitHook = previous_api(_g_post_init_hook) def _g_update_dependent(self): """Update dependent objects after a location change. All dependent objects (but not nodes!) referencing this node must be updated here. """ if '_v_attrs' in self.__dict__: self._v_attrs._g_update_node_location(self) _g_updateDependent = previous_api(_g_update_dependent) def _f_close(self): """Close this node in the tree. This releases all resources held by the node, so it should not be used again. On nodes with data, it may be flushed to disk. You should not need to close nodes manually because they are automatically opened/closed when they are loaded/evicted from the integrated LRU cache. """ # After calling ``_f_close()``, two conditions are met: # # 1. The node object is detached from the tree. # 2. *Every* attribute of the node is removed. # # Thus, cleanup operations used in ``_f_close()`` in sub-classes # must be run *before* calling the method in the superclass. if not self._v_isopen: return # the node is already closed myDict = self.__dict__ # Close the associated `AttributeSet` # only if it has already been placed in the object's dictionary. if '_v_attrs' in myDict: self._v_attrs._g_close() # Detach the node from the tree if necessary. self._g_del_location() # Finally, clear all remaining attributes from the object. myDict.clear() # Just add a final flag to signal that the node is closed: self._v_isopen = False def _g_remove(self, recursive, force): """Remove this node from the hierarchy. If the node has children, recursive removal must be stated by giving `recursive` a true value; otherwise, a `NodeError` will be raised. If `force` is set to true, the node will be removed no matter it has children or not (useful for deleting hard links). It does not log the change. """ # Remove the node from the PyTables hierarchy. parent = self._v_parent parent._g_unrefnode(self._v_name) # Close the node itself. self._f_close() # hdf5extension operations: # Remove the node from the HDF5 hierarchy. self._g_delete(parent) def _f_remove(self, recursive=False, force=False): """Remove this node from the hierarchy. If the node has children, recursive removal must be stated by giving recursive a true value; otherwise, a NodeError will be raised. If the node is a link to a Group object, and you are sure that you want to delete it, you can do this by setting the force flag to true. """ self._g_check_open() file_ = self._v_file file_._check_writable() if file_.is_undo_enabled(): self._g_remove_and_log(recursive, force) else: self._g_remove(recursive, force) def _g_remove_and_log(self, recursive, force): file_ = self._v_file oldpathname = self._v_pathname # Log *before* moving to use the right shadow name. file_._log('REMOVE', oldpathname) move_to_shadow(file_, oldpathname) _g_removeAndLog = previous_api(_g_remove_and_log) def _g_move(self, newparent, newname): """Move this node in the hierarchy. Moves the node into the given `newparent`, with the given `newname`. It does not log the change. """ oldparent = self._v_parent oldname = self._v_name oldpathname = self._v_pathname # to move the HDF5 node # Try to insert the node into the new parent. newparent._g_refnode(self, newname) # Remove the node from the new parent. oldparent._g_unrefnode(oldname) # Remove location information for this node. self._g_del_location() # Set new location information for this node. self._g_set_location(newparent, newname) # hdf5extension operations: # Update node attributes. self._g_new(newparent, self._v_name, init=False) # Move the node. # self._v_parent._g_move_node(oldpathname, self._v_pathname) self._v_parent._g_move_node(oldparent._v_objectid, oldname, newparent._v_objectid, newname, oldpathname, self._v_pathname) # Tell dependent objects about the new location of this node. self._g_update_dependent() def _f_rename(self, newname, overwrite=False): """Rename this node in place. Changes the name of a node to *newname* (a string). If a node with the same newname already exists and overwrite is true, recursively remove it before renaming. """ self._f_move(newname=newname, overwrite=overwrite) def _f_move(self, newparent=None, newname=None, overwrite=False, createparents=False): """Move or rename this node. Moves a node into a new parent group, or changes the name of the node. newparent can be a Group object (see :ref:`GroupClassDescr`) or a pathname in string form. If it is not specified or None, the current parent group is chosen as the new parent. newname must be a string with a new name. If it is not specified or None, the current name is chosen as the new name. If createparents is true, the needed groups for the given new parent group path to exist will be created. Moving a node across databases is not allowed, nor it is moving a node *into* itself. These result in a NodeError. However, moving a node *over* itself is allowed and simply does nothing. Moving over another existing node is similarly not allowed, unless the optional overwrite argument is true, in which case that node is recursively removed before moving. Usually, only the first argument will be used, effectively moving the node to a new location without changing its name. Using only the second argument is equivalent to renaming the node in place. """ self._g_check_open() file_ = self._v_file oldparent = self._v_parent oldname = self._v_name # Set default arguments. if newparent is None and newname is None: raise NodeError("you should specify at least " "a ``newparent`` or a ``newname`` parameter") if newparent is None: newparent = oldparent if newname is None: newname = oldname # Get destination location. if hasattr(newparent, '_v_file'): # from node newfile = newparent._v_file newpath = newparent._v_pathname elif hasattr(newparent, 'startswith'): # from path newfile = file_ newpath = newparent else: raise TypeError("new parent is not a node nor a path: %r" % (newparent, )) # Validity checks on arguments. # Is it in the same file? if newfile is not file_: raise NodeError("nodes can not be moved across databases; " "please make a copy of the node") # The movement always fails if the hosting file can not be modified. file_._check_writable() # Moving over itself? oldpath = oldparent._v_pathname if newpath == oldpath and newname == oldname: # This is equivalent to renaming the node to its current name, # and it does not change the referenced object, # so it is an allowed no-op. return # Moving into itself? self._g_check_not_contains(newpath) # Note that the previous checks allow us to go ahead and create # the parent groups if `createparents` is true. `newparent` is # used instead of `newpath` to avoid accepting `Node` objects # when `createparents` is true. newparent = file_._get_or_create_path(newparent, createparents) self._g_check_group(newparent) # Is it a group? # Moving over an existing node? self._g_maybe_remove(newparent, newname, overwrite) # Move the node. oldpathname = self._v_pathname self._g_move(newparent, newname) # Log the change. if file_.is_undo_enabled(): self._g_log_move(oldpathname) def _g_log_move(self, oldpathname): self._v_file._log('MOVE', oldpathname, self._v_pathname) _g_logMove = previous_api(_g_log_move) def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs): """Copy this node and return the new one. Creates and returns a copy of the node in the given `newparent`, with the given `newname`. If `recursive` copy is stated, all descendents are copied as well. Additional keyword argumens may affect the way that the copy is made. Unknown arguments must be ignored. On recursive copies, all keyword arguments must be passed on to the children invocation of this method. If `_log` is false, the change is not logged. This is *only* intended to be used by ``_g_copy_as_child()`` as a means of optimising sub-tree copies. """ raise NotImplementedError def _g_copy_as_child(self, newparent, **kwargs): """Copy this node as a child of another group. Copies just this node into `newparent`, not recursing children nor overwriting nodes nor logging the copy. This is intended to be used when copying whole sub-trees. """ return self._g_copy(newparent, self._v_name, recursive=False, _log=False, **kwargs) _g_copyAsChild = previous_api(_g_copy_as_child) def _f_copy(self, newparent=None, newname=None, overwrite=False, recursive=False, createparents=False, **kwargs): """Copy this node and return the new node. Creates and returns a copy of the node, maybe in a different place in the hierarchy. newparent can be a Group object (see :ref:`GroupClassDescr`) or a pathname in string form. If it is not specified or None, the current parent group is chosen as the new parent. newname must be a string with a new name. If it is not specified or None, the current name is chosen as the new name. If recursive copy is stated, all descendants are copied as well. If createparents is true, the needed groups for the given new parent group path to exist will be created. Copying a node across databases is supported but can not be undone. Copying a node over itself is not allowed, nor it is recursively copying a node into itself. These result in a NodeError. Copying over another existing node is similarly not allowed, unless the optional overwrite argument is true, in which case that node is recursively removed before copying. Additional keyword arguments may be passed to customize the copying process. For instance, title and filters may be changed, user attributes may be or may not be copied, data may be sub-sampled, stats may be collected, etc. See the documentation for the particular node type. Using only the first argument is equivalent to copying the node to a new location without changing its name. Using only the second argument is equivalent to making a copy of the node in the same group. """ self._g_check_open() srcfile = self._v_file srcparent = self._v_parent srcname = self._v_name dstparent = newparent dstname = newname # Set default arguments. if dstparent is None and dstname is None: raise NodeError("you should specify at least " "a ``newparent`` or a ``newname`` parameter") if dstparent is None: dstparent = srcparent if dstname is None: dstname = srcname # Get destination location. if hasattr(dstparent, '_v_file'): # from node dstfile = dstparent._v_file dstpath = dstparent._v_pathname elif hasattr(dstparent, 'startswith'): # from path dstfile = srcfile dstpath = dstparent else: raise TypeError("new parent is not a node nor a path: %r" % (dstparent, )) # Validity checks on arguments. if dstfile is srcfile: # Copying over itself? srcpath = srcparent._v_pathname if dstpath == srcpath and dstname == srcname: raise NodeError( "source and destination nodes are the same node: ``%s``" % self._v_pathname) # Recursively copying into itself? if recursive: self._g_check_not_contains(dstpath) # Note that the previous checks allow us to go ahead and create # the parent groups if `createparents` is true. `dstParent` is # used instead of `dstPath` because it may be in other file, and # to avoid accepting `Node` objects when `createparents` is # true. dstparent = srcfile._get_or_create_path(dstparent, createparents) self._g_check_group(dstparent) # Is it a group? # Copying to another file with undo enabled? if dstfile is not srcfile and srcfile.is_undo_enabled(): warnings.warn( "copying across databases can not be undone " "nor redone from this database", UndoRedoWarning) # Copying over an existing node? self._g_maybe_remove(dstparent, dstname, overwrite) # Copy the node. # The constructor of the new node takes care of logging. return self._g_copy(dstparent, dstname, recursive, **kwargs) def _f_isvisible(self): """Is this node visible?""" self._g_check_open() return isvisiblepath(self._v_pathname) _f_isVisible = previous_api(_f_isvisible) def _g_check_group(self, node): # Node must be defined in order to define a Group. # However, we need to know Group here. # Using class_name_dict avoids a circular import. if not isinstance(node, class_name_dict['Node']): raise TypeError("new parent is not a registered node: %s" % node._v_pathname) if not isinstance(node, class_name_dict['Group']): raise TypeError("new parent node ``%s`` is not a group" % node._v_pathname) _g_checkGroup = previous_api(_g_check_group) def _g_check_not_contains(self, pathname): # The not-a-TARDIS test. ;) mypathname = self._v_pathname if (mypathname == '/' # all nodes fall below the root group or pathname == mypathname or pathname.startswith(mypathname + '/')): raise NodeError("can not move or recursively copy node ``%s`` " "into itself" % mypathname) _g_checkNotContains = previous_api(_g_check_not_contains) def _g_maybe_remove(self, parent, name, overwrite): if name in parent: if not overwrite: raise NodeError("""\ destination group ``%s`` already has a node named ``%s``; \ you may want to use the ``overwrite`` argument""" % (parent._v_pathname, name)) parent._f_get_child(name)._f_remove(True) _g_maybeRemove = previous_api(_g_maybe_remove) def _g_check_name(self, name): """Check validity of name for this particular kind of node. This is invoked once the standard HDF5 and natural naming checks have successfully passed. """ if name.startswith('_i_'): # This is reserved for table index groups. raise ValueError( "node name starts with reserved prefix ``_i_``: %s" % name) _g_checkName = previous_api(_g_check_name) # <attribute handling> def _f_getattr(self, name): """Get a PyTables attribute from this node. If the named attribute does not exist, an AttributeError is raised. """ return getattr(self._v_attrs, name) _f_getAttr = previous_api(_f_getattr) def _f_setattr(self, name, value): """Set a PyTables attribute for this node. If the node already has a large number of attributes, a PerformanceWarning is issued. """ setattr(self._v_attrs, name, value) _f_setAttr = previous_api(_f_setattr) def _f_delattr(self, name): """Delete a PyTables attribute from this node. If the named attribute does not exist, an AttributeError is raised. """ delattr(self._v_attrs, name) _f_delAttr = previous_api(_f_delattr)
class Description(object): """This class represents descriptions of the structure of tables. An instance of this class is automatically bound to Table (see :ref:`TableClassDescr`) objects when they are created. It provides a browseable representation of the structure of the table, made of non-nested (Col - see :ref:`ColClassDescr`) and nested (Description) columns. Column definitions under a description can be accessed as attributes of it (*natural naming*). For instance, if table.description is a Description instance with a column named col1 under it, the later can be accessed as table.description.col1. If col1 is nested and contains a col2 column, this can be accessed as table.description.col1.col2. Because of natural naming, the names of members start with special prefixes, like in the Group class (see :ref:`GroupClassDescr`). .. rubric:: Description attributes .. attribute:: _v_colobjects A dictionary mapping the names of the columns hanging directly from the associated table or nested column to their respective descriptions (Col - see :ref:`ColClassDescr` or Description - see :ref:`DescriptionClassDescr` instances). .. versionchanged:: 3.0 The *_v_colObjects* attobute has been renamed into *_v_colobjects*. .. attribute:: _v_dflts A dictionary mapping the names of non-nested columns hanging directly from the associated table or nested column to their respective default values. .. attribute:: _v_dtype The NumPy type which reflects the structure of this table or nested column. You can use this as the dtype argument of NumPy array factories. .. attribute:: _v_dtypes A dictionary mapping the names of non-nested columns hanging directly from the associated table or nested column to their respective NumPy types. .. attribute:: _v_is_nested Whether the associated table or nested column contains further nested columns or not. .. attribute:: _v_itemsize The size in bytes of an item in this table or nested column. .. attribute:: _v_name The name of this description group. The name of the root group is '/'. .. attribute:: _v_names A list of the names of the columns hanging directly from the associated table or nested column. The order of the names matches the order of their respective columns in the containing table. .. attribute:: _v_nested_descr A nested list of pairs of (name, format) tuples for all the columns under this table or nested column. You can use this as the dtype and descr arguments of NumPy array factories. .. versionchanged:: 3.0 The *_v_nestedDescr* attribute has been renamed into *_v_nested_descr*. .. attribute:: _v_nested_formats A nested list of the NumPy string formats (and shapes) of all the columns under this table or nested column. You can use this as the formats argument of NumPy array factories. .. versionchanged:: 3.0 The *_v_nestedFormats* attribute has been renamed into *_v_nested_formats*. .. attribute:: _v_nestedlvl The level of the associated table or nested column in the nested datatype. .. attribute:: _v_nested_names A nested list of the names of all the columns under this table or nested column. You can use this as the names argument of NumPy array factories. .. versionchanged:: 3.0 The *_v_nestedNames* attribute has been renamed into *_v_nested_names*. .. attribute:: _v_pathname Pathname of the table or nested column. .. attribute:: _v_pathnames A list of the pathnames of all the columns under this table or nested column (in preorder). If it does not contain nested columns, this is exactly the same as the :attr:`Description._v_names` attribute. .. attribute:: _v_types A dictionary mapping the names of non-nested columns hanging directly from the associated table or nested column to their respective PyTables types. """ _v_colObjects = previous_api_property('_v_colobjects') _v_nestedFormats = previous_api_property('_v_nested_formats') _v_nestedNames = previous_api_property('_v_nested_names') _v_nestedDesct = previous_api_property('_v_nested_descr') def __init__(self, classdict, nestedlvl=-1, validate=True): if not classdict: raise ValueError("cannot create an empty data type") # Do a shallow copy of classdict just in case this is going to # be shared by other instances newdict = self.__dict__ newdict["_v_name"] = "/" # The name for root descriptor newdict["_v_names"] = [] newdict["_v_dtypes"] = {} newdict["_v_types"] = {} newdict["_v_dflts"] = {} newdict["_v_colobjects"] = {} newdict["_v_is_nested"] = False nestedFormats = [] nestedDType = [] if not hasattr(newdict, "_v_nestedlvl"): newdict["_v_nestedlvl"] = nestedlvl + 1 cols_with_pos = [] # colum (position, name) pairs cols_no_pos = [] # just column names # Check for special variables and convert column descriptions for (name, descr) in classdict.iteritems(): if name.startswith('_v_'): if name in newdict: # print("Warning!") # special methods &c: copy to newdict, warn about conflicts warnings.warn("Can't set attr %r in description class %r" % (name, self)) else: # print("Special variable!-->", name, classdict[name]) newdict[name] = descr continue # This variable is not needed anymore columns = None if (type(descr) == type(IsDescription) and issubclass(descr, IsDescription)): # print("Nested object (type I)-->", name) columns = descr().columns elif (type(descr.__class__) == type(IsDescription) and issubclass(descr.__class__, IsDescription)): # print("Nested object (type II)-->", name) columns = descr.columns elif isinstance(descr, dict): # print("Nested object (type III)-->", name) columns = descr else: # print("Nested object (type IV)-->", name) descr = copy.copy(descr) # The copies above and below ensure that the structures # provided by the user will remain unchanged even if we # tamper with the values of ``_v_pos`` here. if columns is not None: descr = Description(copy.copy(columns), self._v_nestedlvl) classdict[name] = descr pos = getattr(descr, '_v_pos', None) if pos is None: cols_no_pos.append(name) else: cols_with_pos.append((pos, name)) # Sort field names: # # 1. Fields with explicit positions, according to their # positions (and their names if coincident). # 2. Fields with no position, in alfabetical order. cols_with_pos.sort() cols_no_pos.sort() keys = [name for (pos, name) in cols_with_pos] + cols_no_pos pos = 0 # Get properties for compound types for k in keys: if validate: # Check for key name validity check_name_validity(k) # Class variables object = classdict[k] newdict[k] = object # To allow natural naming if not (isinstance(object, Col) or isinstance(object, Description)): raise TypeError('Passing an incorrect value to a table column.' ' Expected a Col (or subclass) instance and ' 'got: "%s". Please make use of the Col(), or ' 'descendant, constructor to properly ' 'initialize columns.' % object) object._v_pos = pos # Set the position of this object object._v_parent = self # The parent description pos += 1 newdict['_v_colobjects'][k] = object newdict['_v_names'].append(k) object.__dict__['_v_name'] = k if not isinstance(k, str): # numpy only accepts "str" for field names if sys.version_info[0] < 3: # Python 2.x: unicode --> str kk = k.encode() # use the default encoding else: # Python 3.x: bytes --> str (unicode) kk = k.decode() else: kk = k if isinstance(object, Col): dtype = object.dtype newdict['_v_dtypes'][k] = dtype newdict['_v_types'][k] = object.type newdict['_v_dflts'][k] = object.dflt nestedFormats.append(object.recarrtype) baserecarrtype = dtype.base.str[1:] nestedDType.append((kk, baserecarrtype, dtype.shape)) else: # A description nestedFormats.append(object._v_nested_formats) nestedDType.append((kk, object._v_dtype)) # Assign the format list to _v_nested_formats newdict['_v_nested_formats'] = nestedFormats newdict['_v_dtype'] = numpy.dtype(nestedDType) # _v_itemsize is derived from the _v_dtype that already computes this newdict['_v_itemsize'] = newdict['_v_dtype'].itemsize if self._v_nestedlvl == 0: # Get recursively nested _v_nested_names and _v_nested_descr attrs self._g_set_nested_names_descr() # Get pathnames for nested groups self._g_set_path_names() # Check the _v_byteorder has been used an issue an Error if hasattr(self, "_v_byteorder"): raise ValueError( "Using a ``_v_byteorder`` in the description is obsolete. " "Use the byteorder parameter in the constructor instead.") def _g_set_nested_names_descr(self): """Computes the nested names and descriptions for nested datatypes.""" names = self._v_names fmts = self._v_nested_formats self._v_nested_names = names[:] # Important to do a copy! self._v_nested_descr = [(names[i], fmts[i]) for i in range(len(names))] for i in range(len(names)): name = names[i] new_object = self._v_colobjects[name] if isinstance(new_object, Description): new_object._g_set_nested_names_descr() # replace the column nested name by a correct tuple self._v_nested_names[i] = (name, new_object._v_nested_names) self._v_nested_descr[i] = (name, new_object._v_nested_descr) # set the _v_is_nested flag self._v_is_nested = True _g_setNestedNamesDescr = previous_api(_g_set_nested_names_descr) def _g_set_path_names(self): """Compute the pathnames for arbitrary nested descriptions. This method sets the ``_v_pathname`` and ``_v_pathnames`` attributes of all the elements (both descriptions and columns) in this nested description. """ def get_cols_in_order(description): return [ description._v_colobjects[colname] for colname in description._v_names ] def join_paths(path1, path2): if not path1: return path2 return '%s/%s' % (path1, path2) # The top of the stack always has a nested description # and a list of its child columns # (be they nested ``Description`` or non-nested ``Col`` objects). # In the end, the list contains only a list of column paths # under this one. # # For instance, given this top of the stack:: # # (<Description X>, [<Column A>, <Column B>]) # # After computing the rest of the stack, the top is:: # # (<Description X>, ['a', 'a/m', 'a/n', ... , 'b', ...]) stack = [] # We start by pushing the top-level description # and its child columns. self._v_pathname = '' stack.append((self, get_cols_in_order(self))) while stack: desc, cols = stack.pop() head = cols[0] # What's the first child in the list? if isinstance(head, Description): # A nested description. We remove it from the list and # push it with its child columns. This will be the next # handled description. head._v_pathname = join_paths(desc._v_pathname, head._v_name) stack.append((desc, cols[1:])) # alter the top stack.append((head, get_cols_in_order(head))) # new top elif isinstance(head, Col): # A non-nested column. We simply remove it from the # list and append its name to it. head._v_pathname = join_paths(desc._v_pathname, head._v_name) cols.append(head._v_name) # alter the top stack.append((desc, cols[1:])) # alter the top else: # Since paths and names are appended *to the end* of # children lists, a string signals that no more children # remain to be processed, so we are done with the # description at the top of the stack. assert isinstance(head, basestring) # Assign the computed set of descendent column paths. desc._v_pathnames = cols if len(stack) > 0: # Compute the paths with respect to the parent node # (including the path of the current description) # and append them to its list. descName = desc._v_name colPaths = [join_paths(descName, path) for path in cols] colPaths.insert(0, descName) parentCols = stack[-1][1] parentCols.extend(colPaths) # (Nothing is pushed, we are done with this description.) _g_setPathNames = previous_api(_g_set_path_names) def _f_walk(self, type='All'): """Iterate over nested columns. If type is 'All' (the default), all column description objects (Col and Description instances) are yielded in top-to-bottom order (preorder). If type is 'Col' or 'Description', only column descriptions of that type are yielded. """ if type not in ["All", "Col", "Description"]: raise ValueError("""\ type can only take the parameters 'All', 'Col' or 'Description'.""") stack = [self] while stack: object = stack.pop(0) # pop at the front so as to ensure the order if type in ["All", "Description"]: yield object # yield description names = object._v_names for i in range(len(names)): new_object = object._v_colobjects[names[i]] if isinstance(new_object, Description): stack.append(new_object) else: if type in ["All", "Col"]: yield new_object # yield column def __repr__(self): """Gives a detailed Description column representation.""" rep = [ '%s\"%s\": %r' % (" " * self._v_nestedlvl, k, self._v_colobjects[k]) for k in self._v_names ] return '{\n %s}' % (',\n '.join(rep)) def __str__(self): """Gives a brief Description representation.""" return 'Description(%s)' % self._v_nested_descr
class Group(hdf5extension.Group, Node): """Basic PyTables grouping structure. Instances of this class are grouping structures containing *child* instances of zero or more groups or leaves, together with supporting metadata. Each group has exactly one *parent* group. Working with groups and leaves is similar in many ways to working with directories and files, respectively, in a Unix filesystem. As with Unix directories and files, objects in the object tree are often described by giving their full (or absolute) path names. This full path can be specified either as a string (like in '/group1/group2') or as a complete object path written in *natural naming* schema (like in file.root.group1.group2). A collateral effect of the *natural naming* schema is that the names of members in the Group class and its instances must be carefully chosen to avoid colliding with existing children node names. For this reason and to avoid polluting the children namespace all members in a Group start with some reserved prefix, like _f_ (for public methods), _g_ (for private ones), _v_ (for instance variables) or _c_ (for class variables). Any attempt to create a new child node whose name starts with one of these prefixes will raise a ValueError exception. Another effect of natural naming is that children named after Python keywords or having names not valid as Python identifiers (e.g. class, $a or 44) can not be accessed using the node.child syntax. You will be forced to use node._f_get_child(child) to access them (which is recommended for programmatic accesses). You will also need to use _f_get_child() to access an existing child node if you set a Python attribute in the Group with the same name as that node (you will get a NaturalNameWarning when doing this). Parameters ---------- parentnode The parent :class:`Group` object. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode* name : str The name of this node in its parent group. title The title for this group new If this group is new or has to be read from disk filters : Filters A Filters instance Notes ----- The following documentation includes methods that are automatically called when a Group instance is accessed in a special way. For instance, this class defines the __setattr__, __getattr__, and __delattr__ methods, and they set, get and delete *ordinary Python attributes* as normally intended. In addition to that, __getattr__ allows getting *child nodes* by their name for the sake of easy interaction on the command line, as long as there is no Python attribute with the same name. Groups also allow the interactive completion (when using readline) of the names of child nodes. For instance:: # get a Python attribute nchild = group._v_nchildren # Add a Table child called 'table' under 'group'. h5file.create_table(group, 'table', myDescription) table = group.table # get the table child instance group.table = 'foo' # set a Python attribute # (PyTables warns you here about using the name of a child node.) foo = group.table # get a Python attribute del group.table # delete a Python attribute table = group.table # get the table child instance again .. rubric:: Group attributes The following instance variables are provided in addition to those in Node (see :ref:`NodeClassDescr`): .. attribute:: _v_children Dictionary with all nodes hanging from this group. .. attribute:: _v_groups Dictionary with all groups hanging from this group. .. attribute:: _v_hidden Dictionary with all hidden nodes hanging from this group. .. attribute:: _v_leaves Dictionary with all leaves hanging from this group. .. attribute:: _v_links Dictionary with all links hanging from this group. .. attribute:: _v_unknown Dictionary with all unknown nodes hanging from this group. """ # Class identifier. _c_classid = 'GROUP' _c_classId = previous_api_property('_c_classid') # Children containers that should be loaded only in a lazy way. # These are documented in the ``Group._g_add_children_names`` method. _c_lazy_children_attrs = ('__members__', '_v_children', '_v_groups', '_v_leaves', '_v_links', '_v_unknown', '_v_hidden') # `_v_nchildren` is a direct read-only shorthand # for the number of *visible* children in a group. def _g_getnchildren(self): return len(self._v_children) _v_nchildren = property(_g_getnchildren, None, None, "The number of children hanging from this group.") # `_v_filters` is a direct read-write shorthand for the ``FILTERS`` # attribute with the default `Filters` instance as a default value. def _g_getfilters(self): filters = getattr(self._v_attrs, 'FILTERS', None) if filters is None: filters = Filters() return filters def _g_setfilters(self, value): if not isinstance(value, Filters): raise TypeError("value is not an instance of `Filters`: %r" % (value, )) self._v_attrs.FILTERS = value def _g_delfilters(self): del self._v_attrs.FILTERS _v_filters = property( _g_getfilters, _g_setfilters, _g_delfilters, """Default filter properties for child nodes. You can (and are encouraged to) use this property to get, set and delete the FILTERS HDF5 attribute of the group, which stores a Filters instance (see :ref:`FiltersClassDescr`). When the group has no such attribute, a default Filters instance is used. """) _v_maxGroupWidth = previous_api_property('_v_max_group_width') def __init__(self, parentnode, name, title="", new=False, filters=None, _log=True): # Remember to assign these values in the root group constructor # if it does not use this one! # First, set attributes belonging to group objects. self._v_version = obversion """The object version of this group.""" self._v_new = new """Is this the first time the node has been created?""" self._v_new_title = title """New title for this node.""" self._v_new_filters = filters """New default filter properties for child nodes.""" self._v_max_group_width = parentnode._v_file.params['MAX_GROUP_WIDTH'] """Maximum number of children on each group before warning the user. .. versionchanged:: 3.0 The *_v_maxGroupWidth* attribute has been renamed into *_v_max_group_width*. """ # Finally, set up this object as a node. super(Group, self).__init__(parentnode, name, _log) def _g_post_init_hook(self): if self._v_new: if self._v_file.params['PYTABLES_SYS_ATTRS']: # Save some attributes for the new group on disk. set_attr = self._v_attrs._g__setattr # Set the title, class and version attributes. set_attr('TITLE', self._v_new_title) set_attr('CLASS', self._c_classid) set_attr('VERSION', self._v_version) # Set the default filter properties. newfilters = self._v_new_filters if newfilters is None: # If no filters have been passed in the constructor, # inherit them from the parent group, but only if they # have been inherited or explicitly set. newfilters = getattr(self._v_parent._v_attrs, 'FILTERS', None) if newfilters is not None: set_attr('FILTERS', newfilters) else: # If the file has PyTables format, get the VERSION attr if 'VERSION' in self._v_attrs._v_attrnamessys: self._v_version = self._v_attrs.VERSION else: self._v_version = "0.0 (unknown)" # We don't need to get more attributes from disk, # since the most important ones are defined as properties. _g_postInitHook = previous_api(_g_post_init_hook) def __del__(self): if (self._v_isopen and self._v_pathname in self._v_file._node_manager.registry and '_v_children' in self.__dict__): # The group is going to be killed. Rebuild weak references # (that Python cancelled just before calling this method) so # that they are still usable if the object is revived later. selfref = weakref.ref(self) self._v_children.containerref = selfref self._v_groups.containerref = selfref self._v_leaves.containerref = selfref self._v_links.containerref = selfref self._v_unknown.containerref = selfref self._v_hidden.containerref = selfref super(Group, self).__del__() def _g_get_child_group_class(self, childname): """Get the class of a not-yet-loaded group child. `childname` must be the name of a *group* child. """ childCID = self._g_get_gchild_attr(childname, 'CLASS') if childCID is not None and not isinstance(childCID, str): childCID = childCID.decode('utf-8') if childCID in class_id_dict: return class_id_dict[childCID] # look up group class else: return Group # default group class _g_getChildGroupClass = previous_api(_g_get_child_group_class) def _g_get_child_leaf_class(self, childname, warn=True): """Get the class of a not-yet-loaded leaf child. `childname` must be the name of a *leaf* child. If the child belongs to an unknown kind of leaf, or if its kind can not be guessed, `UnImplemented` will be returned and a warning will be issued if `warn` is true. """ if self._v_file.params['PYTABLES_SYS_ATTRS']: childCID = self._g_get_lchild_attr(childname, 'CLASS') if childCID is not None and not isinstance(childCID, str): childCID = childCID.decode('utf-8') else: childCID = None if childCID in class_id_dict: return class_id_dict[childCID] # look up leaf class else: # Unknown or no ``CLASS`` attribute, try a guess. childCID2 = utilsextension.which_class(self._v_objectid, childname) if childCID2 == 'UNSUPPORTED': if warn: if childCID is None: warnings.warn( "leaf ``%s`` is of an unsupported type; " "it will become an ``UnImplemented`` node" % self._g_join(childname)) else: warnings.warn( ("leaf ``%s`` has an unknown class ID ``%s``; " "it will become an ``UnImplemented`` node") % (self._g_join(childname), childCID)) return UnImplemented assert childCID2 in class_id_dict return class_id_dict[childCID2] # look up leaf class _g_getChildLeafClass = previous_api(_g_get_child_leaf_class) def _g_add_children_names(self): """Add children names to this group taking into account their visibility and kind.""" mydict = self.__dict__ # The names of the lazy attributes mydict['__members__'] = members = [] """The names of visible children nodes for readline-style completion. """ mydict['_v_children'] = children = _ChildrenDict(self) """The number of children hanging from this group.""" mydict['_v_groups'] = groups = _ChildrenDict(self) """Dictionary with all groups hanging from this group.""" mydict['_v_leaves'] = leaves = _ChildrenDict(self) """Dictionary with all leaves hanging from this group.""" mydict['_v_links'] = links = _ChildrenDict(self) """Dictionary with all links hanging from this group.""" mydict['_v_unknown'] = unknown = _ChildrenDict(self) """Dictionary with all unknown nodes hanging from this group.""" mydict['_v_hidden'] = hidden = _ChildrenDict(self) """Dictionary with all hidden nodes hanging from this group.""" # Get the names of *all* child groups and leaves. (group_names, leaf_names, link_names, unknown_names) = \ self._g_list_group(self._v_parent) # Separate groups into visible groups and hidden nodes, # and leaves into visible leaves and hidden nodes. for (childnames, childdict) in ((group_names, groups), (leaf_names, leaves), (link_names, links), (unknown_names, unknown)): for childname in childnames: # See whether the name implies that the node is hidden. # (Assigned values are entirely irrelevant.) if isvisiblename(childname): # Visible node. members.insert(0, childname) children[childname] = None childdict[childname] = None else: # Hidden node. hidden[childname] = None _g_addChildrenNames = previous_api(_g_add_children_names) def _g_check_has_child(self, name): """Check whether 'name' is a children of 'self' and return its type.""" # Get the HDF5 name matching the PyTables name. node_type = self._g_get_objinfo(name) if node_type == "NoSuchNode": raise NoSuchNodeError( "group ``%s`` does not have a child named ``%s``" % (self._v_pathname, name)) return node_type _g_checkHasChild = previous_api(_g_check_has_child) def __iter__(self): """Iterate over the child nodes hanging directly from the group. This iterator is *not* recursive. Examples -------- :: # Non-recursively list all the nodes hanging from '/detector' print("Nodes in '/detector' group:") for node in h5file.root.detector: print(node) """ return self._f_iter_nodes() def __contains__(self, name): """Is there a child with that `name`? Returns a true value if the group has a child node (visible or hidden) with the given `name` (a string), false otherwise. """ self._g_check_open() try: self._g_check_has_child(name) except NoSuchNodeError: return False return True def _f_walknodes(self, classname=None): """Iterate over descendant nodes. This method recursively walks *self* top to bottom (preorder), iterating over child groups in alphanumerical order, and yielding nodes. If classname is supplied, only instances of the named class are yielded. If *classname* is Group, it behaves like :meth:`Group._f_walk_groups`, yielding only groups. If you don't want a recursive behavior, use :meth:`Group._f_iter_nodes` instead. Examples -------- :: # Recursively print all the arrays hanging from '/' print("Arrays in the object tree '/':") for array in h5file.root._f_walknodes('Array', recursive=True): print(array) """ self._g_check_open() # For compatibility with old default arguments. if classname == '': classname = None if classname == "Group": # Recursive algorithm for group in self._f_walk_groups(): yield group else: for group in self._f_walk_groups(): for leaf in group._f_iter_nodes(classname): yield leaf _f_walkNodes = previous_api(_f_walknodes) def _g_join(self, name): """Helper method to correctly concatenate a name child object with the pathname of this group.""" if name == "/": # This case can happen when doing copies return self._v_pathname return join_path(self._v_pathname, name) def _g_width_warning(self): """Issue a :exc:`PerformanceWarning` on too many children.""" warnings.warn( """\ group ``%s`` is exceeding the recommended maximum number of children (%d); \ be ready to see PyTables asking for *lots* of memory and possibly slow I/O.""" % (self._v_pathname, self._v_max_group_width), PerformanceWarning) _g_widthWarning = previous_api(_g_width_warning) def _g_refnode(self, childnode, childname, validate=True): """Insert references to a `childnode` via a `childname`. Checks that the `childname` is valid and does not exist, then creates references to the given `childnode` by that `childname`. The validation of the name can be omitted by setting `validate` to a false value (this may be useful for adding already existing nodes to the tree). """ # Check for name validity. if validate: check_name_validity(childname) childnode._g_check_name(childname) # Check if there is already a child with the same name. # This can be triggered because of the user # (via node construction or renaming/movement). # Links are not checked here because they are copied and referenced # using ``File.get_node`` so they already exist in `self`. if (not isinstance(childnode, Link)) and childname in self: raise NodeError( "group ``%s`` already has a child node named ``%s``" % (self._v_pathname, childname)) # Show a warning if there is an object attribute with that name. if childname in self.__dict__: warnings.warn( "group ``%s`` already has an attribute named ``%s``; " "you will not be able to use natural naming " "to access the child node" % (self._v_pathname, childname), NaturalNameWarning) # Check group width limits. if (len(self._v_children) + len(self._v_hidden) >= self._v_max_group_width): self._g_width_warning() # Update members information. # Insert references to the new child. # (Assigned values are entirely irrelevant.) if isvisiblename(childname): # Visible node. self.__members__.insert(0, childname) # enable completion self._v_children[childname] = None # insert node if isinstance(childnode, Unknown): self._v_unknown[childname] = None elif isinstance(childnode, Link): self._v_links[childname] = None elif isinstance(childnode, Leaf): self._v_leaves[childname] = None elif isinstance(childnode, Group): self._v_groups[childname] = None else: # Hidden node. self._v_hidden[childname] = None # insert node _g_refNode = previous_api(_g_refnode) def _g_unrefnode(self, childname): """Remove references to a node. Removes all references to the named node. """ # This can *not* be triggered because of the user. assert childname in self, \ ("group ``%s`` does not have a child node named ``%s``" % (self._v_pathname, childname)) # Update members information, if needed if '_v_children' in self.__dict__: if childname in self._v_children: # Visible node. members = self.__members__ member_index = members.index(childname) del members[member_index] # disables completion del self._v_children[childname] # remove node self._v_unknown.pop(childname, None) self._v_links.pop(childname, None) self._v_leaves.pop(childname, None) self._v_groups.pop(childname, None) else: # Hidden node. del self._v_hidden[childname] # remove node _g_unrefNode = previous_api(_g_unrefnode) def _g_move(self, newparent, newname): # Move the node to the new location. oldpath = self._v_pathname super(Group, self)._g_move(newparent, newname) newpath = self._v_pathname # Update location information in children. This node shouldn't # be affected since it has already been relocated. self._v_file._update_node_locations(oldpath, newpath) def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs): # Compute default arguments. title = kwargs.get('title', self._v_title) filters = kwargs.get('filters', None) stats = kwargs.get('stats', None) # Fix arguments with explicit None values for backwards compatibility. if title is None: title = self._v_title # If no filters have been passed to the call, copy them from the # source group, but only if inherited or explicitly set. if filters is None: filters = getattr(self._v_attrs, 'FILTERS', None) # Create a copy of the object. new_node = Group(newparent, newname, title, new=True, filters=filters, _log=_log) # Copy user attributes if needed. if kwargs.get('copyuserattrs', True): self._v_attrs._g_copy(new_node._v_attrs, copyclass=True) # Update statistics if needed. if stats is not None: stats['groups'] += 1 if recursive: # Copy child nodes if a recursive copy was requested. # Some arguments should *not* be passed to children copy ops. kwargs = kwargs.copy() kwargs.pop('title', None) self._g_copy_children(new_node, **kwargs) return new_node def _g_copy_children(self, newparent, **kwargs): """Copy child nodes. Copies all nodes descending from this one into the specified `newparent`. If the new parent has a child node with the same name as one of the nodes in this group, the copy fails with a `NodeError`, maybe resulting in a partial copy. Nothing is logged. """ # Recursive version of children copy. # for srcchild in self._v_children.itervalues(): ## srcchild._g_copy_as_child(newparent, **kwargs) # Non-recursive version of children copy. use_hardlinks = kwargs.get('use_hardlinks', False) if use_hardlinks: address_map = kwargs.setdefault('address_map', {}) parentstack = [(self, newparent)] # [(source, destination), ...] while parentstack: (srcparent, dstparent) = parentstack.pop() if use_hardlinks: for srcchild in srcparent._v_children.itervalues(): addr, rc = srcchild._get_obj_info() if rc > 1 and addr in address_map: where, name = address_map[addr][0] localsrc = os.path.join(where, name) dstparent._v_file.create_hard_link( dstparent, srcchild.name, localsrc) address_map[addr].append( (dstparent._v_pathname, srcchild.name)) # Update statistics if needed. stats = kwargs.pop('stats', None) if stats is not None: stats['hardlinks'] += 1 else: dstchild = srcchild._g_copy_as_child( dstparent, **kwargs) if isinstance(srcchild, Group): parentstack.append((srcchild, dstchild)) if rc > 1: address_map[addr] = [(dstparent._v_pathname, srcchild.name)] else: for srcchild in srcparent._v_children.itervalues(): dstchild = srcchild._g_copy_as_child(dstparent, **kwargs) if isinstance(srcchild, Group): parentstack.append((srcchild, dstchild)) _g_copyChildren = previous_api(_g_copy_children) def _f_get_child(self, childname): """Get the child called childname of this group. If the child exists (be it visible or not), it is returned. Else, a NoSuchNodeError is raised. Using this method is recommended over getattr() when doing programmatic accesses to children if childname is unknown beforehand or when its name is not a valid Python identifier. """ self._g_check_open() self._g_check_has_child(childname) childpath = join_path(self._v_pathname, childname) return self._v_file._get_node(childpath) _f_getChild = previous_api(_f_get_child) def _f_list_nodes(self, classname=None): """Return a *list* with children nodes. This is a list-returning version of :meth:`Group._f_iter_nodes()`. """ return list(self._f_iter_nodes(classname)) _f_listNodes = previous_api(_f_list_nodes) def _f_iter_nodes(self, classname=None): """Iterate over children nodes. Child nodes are yielded alphanumerically sorted by node name. If the name of a class derived from Node (see :ref:`NodeClassDescr`) is supplied in the classname parameter, only instances of that class (or subclasses of it) will be returned. This is an iterator version of :meth:`Group._f_list_nodes`. """ self._g_check_open() if not classname: # Returns all the children alphanumerically sorted names = sorted(self._v_children.iterkeys()) for name in names: yield self._v_children[name] elif classname == 'Group': # Returns all the groups alphanumerically sorted names = sorted(self._v_groups.iterkeys()) for name in names: yield self._v_groups[name] elif classname == 'Leaf': # Returns all the leaves alphanumerically sorted names = sorted(self._v_leaves.iterkeys()) for name in names: yield self._v_leaves[name] elif classname == 'Link': # Returns all the links alphanumerically sorted names = sorted(self._v_links.iterkeys()) for name in names: yield self._v_links[name] elif classname == 'IndexArray': raise TypeError("listing ``IndexArray`` nodes is not allowed") else: class_ = get_class_by_name(classname) children = self._v_children childnames = sorted(children.iterkeys()) for childname in childnames: childnode = children[childname] if isinstance(childnode, class_): yield childnode _f_iterNodes = previous_api(_f_iter_nodes) def _f_walk_groups(self): """Recursively iterate over descendent groups (not leaves). This method starts by yielding *self*, and then it goes on to recursively iterate over all child groups in alphanumerical order, top to bottom (preorder), following the same procedure. """ self._g_check_open() stack = [self] yield self # Iterate over the descendants while stack: objgroup = stack.pop() groupnames = sorted(objgroup._v_groups.iterkeys()) # Sort the groups before delivering. This uses the groups names # for groups in tree (in order to sort() can classify them). for groupname in groupnames: stack.append(objgroup._v_groups[groupname]) yield objgroup._v_groups[groupname] _f_walkGroups = previous_api(_f_walk_groups) def __delattr__(self, name): """Delete a Python attribute called name. This method deletes an *ordinary Python attribute* from the object. It does *not* remove children nodes from this group; for that, use :meth:`File.remove_node` or :meth:`Node._f_remove`. It does *neither* delete a PyTables node attribute; for that, use :meth:`File.del_node_attr`, :meth:`Node._f_delattr` or :attr:`Node._v_attrs``. If there is an attribute and a child node with the same name, the child node will be made accessible again via natural naming. """ try: super(Group, self).__delattr__(name) # nothing particular except AttributeError as ae: hint = " (use ``node._f_remove()`` if you want to remove a node)" raise ae.__class__(str(ae) + hint) def __getattr__(self, name): """Get a Python attribute or child node called name. If the object has a Python attribute called name, its value is returned. Else, if the node has a child node called name, it is returned. Else, an AttributeError is raised. """ # That is true since a `NoSuchNodeError` is an `AttributeError`. mydict = self.__dict__ if name in mydict: return mydict[name] elif name in self._c_lazy_children_attrs: self._g_add_children_names() return mydict[name] return self._f_get_child(name) def __setattr__(self, name, value): """Set a Python attribute called name with the given value. This method stores an *ordinary Python attribute* in the object. It does *not* store new children nodes under this group; for that, use the File.create*() methods (see the File class in :ref:`FileClassDescr`). It does *neither* store a PyTables node attribute; for that, use :meth:`File.set_node_attr`, :meth`:Node._f_setattr` or :attr:`Node._v_attrs`. If there is already a child node with the same name, a NaturalNameWarning will be issued and the child node will not be accessible via natural naming nor getattr(). It will still be available via :meth:`File.get_node`, :meth:`Group._f_get_child` and children dictionaries in the group (if visible). """ # Show a warning if there is an child node with that name. # # ..note:: # # Using ``if name in self:`` is not right since that would # require ``_v_children`` and ``_v_hidden`` to be already set # when the very first attribute assignments are made. # Moreover, this warning is only concerned about clashes with # names used in natural naming, i.e. those in ``__members__``. # # ..note:: # # The check ``'__members__' in myDict`` allows attribute # assignment to happen before calling `Group.__init__()`, by # avoiding to look into the still not assigned ``__members__`` # attribute. This allows subclasses to set up some attributes # and then call the constructor of the superclass. If the # check above is disabled, that results in Python entering an # endless loop on exit! mydict = self.__dict__ if '__members__' in mydict and name in self.__members__: warnings.warn( "group ``%s`` already has a child node named ``%s``; " "you will not be able to use natural naming " "to access the child node" % (self._v_pathname, name), NaturalNameWarning) super(Group, self).__setattr__(name, value) def _f_flush(self): """Flush this Group.""" self._g_check_open() self._g_flush_group() def _g_close_descendents(self): """Close all the *loaded* descendent nodes of this group.""" node_manager = self._v_file._node_manager node_manager.close_subtree(self._v_pathname) _g_closeDescendents = previous_api(_g_close_descendents) def _g_close(self): """Close this (open) group.""" if self._v_isopen: # hdf5extension operations: # Close HDF5 group. self._g_close_group() # Close myself as a node. super(Group, self)._f_close() def _f_close(self): """Close this group and all its descendents. This method has the behavior described in :meth:`Node._f_close`. It should be noted that this operation closes all the nodes descending from this group. You should not need to close nodes manually because they are automatically opened/closed when they are loaded/evicted from the integrated LRU cache. """ # If the group is already closed, return immediately if not self._v_isopen: return # First, close all the descendents of this group, unless a) the # group is being deleted (evicted from LRU cache) or b) the node # is being closed during an aborted creation, in which cases # this is not an explicit close issued by the user. if not (self._v__deleting or self._v_objectid is None): self._g_close_descendents() # When all the descendents have been closed, close this group. # This is done at the end because some nodes may still need to # be loaded during the closing process; thus this node must be # open until the very end. self._g_close() def _g_remove(self, recursive=False, force=False): """Remove (recursively if needed) the Group. This version correctly handles both visible and hidden nodes. """ if self._v_nchildren > 0: if not (recursive or force): raise NodeError("group ``%s`` has child nodes; " "please set `recursive` or `force` to true " "to remove it" % (self._v_pathname, )) # First close all the descendents hanging from this group, # so that it is not possible to use a node that no longer exists. self._g_close_descendents() # Remove the node itself from the hierarchy. super(Group, self)._g_remove(recursive, force) def _f_copy(self, newparent=None, newname=None, overwrite=False, recursive=False, createparents=False, **kwargs): """Copy this node and return the new one. This method has the behavior described in :meth:`Node._f_copy`. In addition, it recognizes the following keyword arguments: Parameters ---------- title The new title for the destination. If omitted or None, the original title is used. This only applies to the topmost node in recursive copies. filters : Filters Specifying this parameter overrides the original filter properties in the source node. If specified, it must be an instance of the Filters class (see :ref:`FiltersClassDescr`). The default is to copy the filter properties from the source node. copyuserattrs You can prevent the user attributes from being copied by setting thisparameter to False. The default is to copy them. stats This argument may be used to collect statistics on the copy process. When used, it should be a dictionary with keys 'groups', 'leaves', 'links' and 'bytes' having a numeric value. Their values willbe incremented to reflect the number of groups, leaves and bytes, respectively, that have been copied during the operation. """ return super(Group, self)._f_copy(newparent, newname, overwrite, recursive, createparents, **kwargs) def _f_copy_children(self, dstgroup, overwrite=False, recursive=False, createparents=False, **kwargs): """Copy the children of this group into another group. Children hanging directly from this group are copied into dstgroup, which can be a Group (see :ref:`GroupClassDescr`) object or its pathname in string form. If createparents is true, the needed groups for the given destination group path to exist will be created. The operation will fail with a NodeError if there is a child node in the destination group with the same name as one of the copied children from this one, unless overwrite is true; in this case, the former child node is recursively removed before copying the later. By default, nodes descending from children groups of this node are not copied. If the recursive argument is true, all descendant nodes of this node are recursively copied. Additional keyword arguments may be passed to customize the copying process. For instance, title and filters may be changed, user attributes may be or may not be copied, data may be sub-sampled, stats may be collected, etc. Arguments unknown to nodes are simply ignored. Check the documentation for copying operations of nodes to see which options they support. """ self._g_check_open() # `dstgroup` is used instead of its path to avoid accepting # `Node` objects when `createparents` is true. Also, note that # there is no risk of creating parent nodes and failing later # because of destination nodes already existing. dstparent = self._v_file._get_or_create_path(dstgroup, createparents) self._g_check_group(dstparent) # Is it a group? if not overwrite: # Abort as early as possible when destination nodes exist # and overwriting is not enabled. for childname in self._v_children: if childname in dstparent: raise NodeError( "destination group ``%s`` already has " "a node named ``%s``; " "you may want to use the ``overwrite`` argument" % (dstparent._v_pathname, childname)) use_hardlinks = kwargs.get('use_hardlinks', False) if use_hardlinks: address_map = kwargs.setdefault('address_map', {}) for child in self._v_children.itervalues(): addr, rc = child._get_obj_info() if rc > 1 and addr in address_map: where, name = address_map[addr][0] localsrc = os.path.join(where, name) dstparent._v_file.create_hard_link(dstparent, child.name, localsrc) address_map[addr].append( (dstparent._v_pathname, child.name)) # Update statistics if needed. stats = kwargs.pop('stats', None) if stats is not None: stats['hardlinks'] += 1 else: child._f_copy(dstparent, None, overwrite, recursive, **kwargs) if rc > 1: address_map[addr] = [(dstparent._v_pathname, child.name)] else: for child in self._v_children.itervalues(): child._f_copy(dstparent, None, overwrite, recursive, **kwargs) _f_copyChildren = previous_api(_f_copy_children) def __str__(self): """Return a short string representation of the group. Examples -------- :: >>> f=tables.open_file('data/test.h5') >>> print(f.root.group0) /group0 (Group) 'First Group' """ pathname = self._v_pathname classname = self.__class__.__name__ title = self._v_title return "%s (%s) %r" % (pathname, classname, title) def __repr__(self): """Return a detailed string representation of the group. Examples -------- :: >>> f = tables.open_file('data/test.h5') >>> f.root.group0 /group0 (Group) 'First Group' children := ['tuple1' (Table), 'group1' (Group)] """ rep = [ '%r (%s)' % (childname, child.__class__.__name__) for (childname, child) in self._v_children.iteritems() ] childlist = '[%s]' % (', '.join(rep)) return "%s\n children := %s" % (str(self), childlist)
class RootGroup(Group): _v_objectId = previous_api_property('_v_objectid') def __init__(self, ptfile, name, title, new, filters): mydict = self.__dict__ # Set group attributes. self._v_version = obversion self._v_new = new if new: self._v_new_title = title self._v_new_filters = filters else: self._v_new_title = None self._v_new_filters = None # Set node attributes. self._v_file = ptfile self._v_isopen = True # root is always open self._v_pathname = '/' self._v_name = '/' self._v_depth = 0 self._v_max_group_width = ptfile.params['MAX_GROUP_WIDTH'] self._v__deleting = False self._v_objectid = None # later # Only the root node has the file as a parent. # Bypass __setattr__ to avoid the ``Node._v_parent`` property. mydict['_v_parent'] = ptfile ptfile._node_manager.register_node(self, '/') # hdf5extension operations (do before setting an AttributeSet): # Update node attributes. self._g_new(ptfile, name, init=True) # Open the node and get its object ID. self._v_objectid = self._g_open() # Set disk attributes and read children names. # # This *must* be postponed because this method needs the root node # to be created and bound to ``File.root``. # This is an exception to the rule, handled by ``File.__init()__``. # # self._g_post_init_hook() def _g_load_child(self, childname): """Load a child node from disk. The child node `childname` is loaded from disk and an adequate `Node` object is created and returned. If there is no such child, a `NoSuchNodeError` is raised. """ if self._v_file.root_uep != "/": childname = join_path(self._v_file.root_uep, childname) # Is the node a group or a leaf? node_type = self._g_check_has_child(childname) # Nodes that HDF5 report as H5G_UNKNOWN if node_type == 'Unknown': return Unknown(self, childname) # Guess the PyTables class suited to the node, # build a PyTables node and return it. if node_type == "Group": if self._v_file.params['PYTABLES_SYS_ATTRS']: ChildClass = self._g_get_child_group_class(childname) else: # Default is a Group class ChildClass = Group return ChildClass(self, childname, new=False) elif node_type == "Leaf": ChildClass = self._g_get_child_leaf_class(childname, warn=True) # Building a leaf may still fail because of unsupported types # and other causes. # return ChildClass(self, childname) # uncomment for debugging try: return ChildClass(self, childname) except Exception as exc: # XXX warnings.warn( "problems loading leaf ``%s``::\n\n" " %s\n\n" "The leaf will become an ``UnImplemented`` node." % (self._g_join(childname), exc)) # If not, associate an UnImplemented object to it return UnImplemented(self, childname) elif node_type == "SoftLink": return SoftLink(self, childname) elif node_type == "ExternalLink": return ExternalLink(self, childname) else: return UnImplemented(self, childname) _g_loadChild = previous_api(_g_load_child) def _f_rename(self, newname): raise NodeError("the root node can not be renamed") def _f_move(self, newparent=None, newname=None, createparents=False): raise NodeError("the root node can not be moved") def _f_remove(self, recursive=False): raise NodeError("the root node can not be removed")
class UnImplemented(hdf5extension.UnImplemented, Leaf): """This class represents datasets not supported by PyTables in an HDF5 file. When reading a generic HDF5 file (i.e. one that has not been created with PyTables, but with some other HDF5 library based tool), chances are that the specific combination of datatypes or dataspaces in some dataset might not be supported by PyTables yet. In such a case, this dataset will be mapped into an UnImplemented instance and the user will still be able to access the complete object tree of the generic HDF5 file. The user will also be able to *read and write the attributes* of the dataset, *access some of its metadata*, and perform *certain hierarchy manipulation operations* like deleting or moving (but not copying) the node. Of course, the user will not be able to read the actual data on it. This is an elegant way to allow users to work with generic HDF5 files despite the fact that some of its datasets are not supported by PyTables. However, if you are really interested in having full access to an unimplemented dataset, please get in contact with the developer team. This class does not have any public instance variables or methods, except those inherited from the Leaf class (see :ref:`LeafClassDescr`). """ # Class identifier. _c_classid = 'UNIMPLEMENTED' _c_classId = previous_api_property('_c_classid') def __init__(self, parentnode, name): """Create the `UnImplemented` instance.""" # UnImplemented objects always come from opening an existing node # (they can not be created). self._v_new = False """Is this the first time the node has been created?""" self.nrows = SizeType(0) """The length of the first dimension of the data.""" self.shape = (SizeType(0), ) """The shape of the stored data.""" self.byteorder = None """The endianness of data in memory ('big', 'little' or 'irrelevant').""" super(UnImplemented, self).__init__(parentnode, name) def _g_open(self): (self.shape, self.byteorder, object_id) = self._open_unimplemented() try: self.nrows = SizeType(self.shape[0]) except IndexError: self.nrows = SizeType(0) return object_id def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs): """Do nothing. This method does nothing, but a ``UserWarning`` is issued. Please note that this method *does not return a new node*, but ``None``. """ warnings.warn( "UnImplemented node %r does not know how to copy itself; skipping" % (self._v_pathname, )) return None # Can you see it? def _f_copy(self, newparent=None, newname=None, overwrite=False, recursive=False, createparents=False, **kwargs): """Do nothing. This method does nothing, since `UnImplemented` nodes can not be copied. However, a ``UserWarning`` is issued. Please note that this method *does not return a new node*, but ``None``. """ # This also does nothing but warn. self._g_copy(newparent, newname, recursive, **kwargs) return None # Can you see it? def __repr__(self): return """%s NOTE: <The UnImplemented object represents a PyTables unimplemented dataset present in the '%s' HDF5 file. If you want to see this kind of HDF5 dataset implemented in PyTables, please contact the developers.> """ % (str(self), self._v_file.filename)
class OldIndexArray(UnImplemented): _c_classid = 'IndexArray' _c_classId = previous_api_property('_c_classid')
class ProxyDict(dict): """A dictionary which uses a container object to store its values.""" containerRef = previous_api_property('containerref') def __init__(self, container): self.containerref = weakref.ref(container) """A weak reference to the container object. .. versionchanged:: 3.0 The *containerRef* attribute has been renamed into *containerref*. """ def __getitem__(self, key): if key not in self: raise KeyError(key) # Values are not actually stored to avoid extra references. return self._get_value_from_container(self._get_container(), key) def __setitem__(self, key, value): # Values are not actually stored to avoid extra references. super(ProxyDict, self).__setitem__(key, None) def __repr__(self): return object.__repr__(self) def __str__(self): # C implementation does not use `self.__getitem__()`. :( itemFormat = '%r: %r' itemReprs = [itemFormat % item for item in self.iteritems()] return '{%s}' % ', '.join(itemReprs) def values(self): # C implementation does not use `self.__getitem__()`. :( valueList = [] for key in self.iterkeys(): valueList.append(self[key]) return valueList def itervalues(self): # C implementation does not use `self.__getitem__()`. :( for key in self.iterkeys(): yield self[key] raise StopIteration def items(self): # C implementation does not use `self.__getitem__()`. :( itemList = [] for key in self.iterkeys(): itemList.append((key, self[key])) return itemList def iteritems(self): # C implementation does not use `self.__getitem__()`. :( for key in self.iterkeys(): yield (key, self[key]) raise StopIteration def _get_container(self): container = self.containerref() if container is None: raise ValueError("the container object does no longer exist") return container _getContainer = previous_api(_get_container)
class SoftLink(linkextension.SoftLink, Link): """Represents a soft link (aka symbolic link). A soft link is a reference to another node in the *same* file hierarchy. Provided that the target node exists, its attributes and methods can be accessed directly from the softlink using the normal `.` syntax. Softlinks also have the following public methods/attributes: * `target` * `dereference()` * `copy()` * `move()` * `remove()` * `rename()` * `is_dangling()` Note that these will override any correspondingly named methods/attributes of the target node. For backwards compatibility, it is also possible to obtain the target node via the `__call__()` special method (this action is called *dereferencing*; see below) Examples -------- :: >>> f = tables.open_file('/tmp/test_softlink.h5', 'w') >>> a = f.create_array('/', 'A', np.arange(10)) >>> link_a = f.create_soft_link('/', 'link_A', target='/A') # transparent read/write access to a softlinked node >>> link_a[0] = -1 >>> print(link_a[:], link_a.dtype) (array([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]), dtype('int64')) # dereferencing a softlink using the __call__() method >>> print(link_a() is a) True # SoftLink.remove() overrides Array.remove() >>> link_a.remove() >>> print(link_a) <closed tables.link.SoftLink at 0x7febe97186e0> >>> print(a[:], a.dtype) (array([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]), dtype('int64')) """ # Class identifier. _c_classid = 'SOFTLINK' _c_classId = previous_api_property('_c_classid') # attributes with these names/prefixes are treated as attributes of the # SoftLink rather than the target node _link_attrnames = ('target', 'dereference', 'is_dangling', 'copy', 'move', 'remove', 'rename', '__init__', '__str__', '__repr__', '__class__', '__dict__') _link_attrprefixes = ('_f_', '_c_', '_g_', '_v_') def __call__(self): """Dereference `self.target` and return the object. Examples -------- :: >>> f=tables.open_file('data/test.h5') >>> print(f.root.link0) /link0 (SoftLink) -> /another/path >>> print(f.root.link0()) /another/path (Group) '' """ return self.dereference() def dereference(self): if self._v_isopen: target = self.target # Check for relative pathnames if not self.target.startswith('/'): target = self._v_parent._g_join(self.target) return self._v_file._get_node(target) else: return None def __getattribute__(self, attrname): # get attribute of the SoftLink itself if (attrname in SoftLink._link_attrnames or attrname[:3] in SoftLink._link_attrprefixes): return object.__getattribute__(self, attrname) # get attribute of the target node elif not self._v_isopen: raise tables.ClosedNodeError('the node object is closed') elif self.is_dangling(): return None else: target_node = self.dereference() try: # __getattribute__() fails to get children of Groups return target_node.__getattribute__(attrname) except AttributeError: # some node classes (e.g. Array) don't implement __getattr__() return target_node.__getattr__(attrname) def __setattr__(self, attrname, value): # set attribute of the SoftLink itself if (attrname in SoftLink._link_attrnames or attrname[:3] in SoftLink._link_attrprefixes): object.__setattr__(self, attrname, value) # set attribute of the target node elif not self._v_isopen: raise tables.ClosedNodeError('the node object is closed') elif self.is_dangling(): raise ValueError("softlink target does not exist") else: self.dereference().__setattr__(attrname, value) def __getitem__(self, key): """__getitem__ must be defined in the SoftLink class in order for array indexing syntax to work""" if not self._v_isopen: raise tables.ClosedNodeError('the node object is closed') elif self.is_dangling(): raise ValueError("softlink target does not exist") else: return self.dereference().__getitem__(key) def __setitem__(self, key, value): """__setitem__ must be defined in the SoftLink class in order for array indexing syntax to work""" if not self._v_isopen: raise tables.ClosedNodeError('the node object is closed') elif self.is_dangling(): raise ValueError("softlink target does not exist") else: self.dereference().__setitem__(key, value) def is_dangling(self): return not (self.dereference() in self._v_file) def __str__(self): """Return a short string representation of the link. Examples -------- :: >>> f=tables.open_file('data/test.h5') >>> print(f.root.link0) /link0 (SoftLink) -> /path/to/node """ classname = self.__class__.__name__ target = str(self.target) # Check for relative pathnames if not self.target.startswith('/'): target = self._v_parent._g_join(self.target) if self._v_isopen: closed = "" else: closed = "closed " if target not in self._v_file: dangling = " (dangling)" else: dangling = "" return "%s%s (%s) -> %s%s" % (closed, self._v_pathname, classname, self.target, dangling)
class SoftLink(linkextension.SoftLink, Link): """Represents a soft link (aka symbolic link). A soft link is a reference to another node in the *same* file hierarchy. Getting access to the pointed node (this action is called *dereferrencing*) is done via the __call__ special method (see below). """ # Class identifier. _c_classid = 'SOFTLINK' _c_classId = previous_api_property('_c_classid') def __call__(self): """Dereference `self.target` and return the object. Examples -------- :: >>> f=tables.open_file('data/test.h5') >>> print f.root.link0 /link0 (SoftLink) -> /another/path >>> print f.root.link0() /another/path (Group) '' """ target = self.target # Check for relative pathnames if not self.target.startswith('/'): target = self._v_parent._g_join(self.target) return self._v_file._get_node(target) def __str__(self): """Return a short string representation of the link. Examples -------- :: >>> f=tables.open_file('data/test.h5') >>> print f.root.link0 /link0 (SoftLink) -> /path/to/node """ classname = self.__class__.__name__ target = self.target # Check for relative pathnames if not self.target.startswith('/'): target = self._v_parent._g_join(self.target) if target in self._v_file: dangling = "" else: dangling = " (dangling)" return "%s (%s) -> %s%s" % (self._v_pathname, classname, self.target, dangling)
class IndexArray(NotLoggedMixin, EArray, indexesextension.IndexArray): """Represent the index (sorted or reverse index) dataset in HDF5 file. All NumPy typecodes are supported except for complex datatypes. Parameters ---------- parentnode The Index class from which this object will hang off. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode*. name : str The name of this node in its parent group. atom An Atom object representing the shape and type of the atomic objects to be saved. Only scalar atoms are supported. title Sets a TITLE attribute on the array entity. filters : Filters An instance of the Filters class that provides information about the desired I/O filters to be applied during the life of this object. byteorder The byteroder of the data on-disk. """ # Class identifier. _c_classid = 'INDEXARRAY' _c_classId = previous_api_property('_c_classid') # Properties # ~~~~~~~~~~ chunksize = property(lambda self: self.chunkshape[1], None, None, """The chunksize for this object.""") slicesize = property(lambda self: self.shape[1], None, None, """The slicesize for this object.""") # Other methods # ~~~~~~~~~~~~~ def __init__(self, parentnode, name, atom=None, title="", filters=None, byteorder=None): """Create an IndexArray instance.""" self._v_pathname = parentnode._g_join(name) if atom is not None: # The shape and chunkshape needs to be fixed here if name == "sorted": reduction = parentnode.reduction shape = (0, parentnode.slicesize // reduction) chunkshape = (1, parentnode.chunksize // reduction) else: shape = (0, parentnode.slicesize) chunkshape = (1, parentnode.chunksize) else: # The shape and chunkshape will be read from disk later on shape = None chunkshape = None super(IndexArray, self).__init__(parentnode, name, atom, shape, title, filters, chunkshape=chunkshape, byteorder=byteorder) # This version of searchBin uses both ranges (1st level) and # bounds (2nd level) caches. It uses a cache for boundary rows, # but not for 'sorted' rows (this is only supported for the # 'optimized' types). def _search_bin(self, nrow, item): item1, item2 = item result1 = -1 result2 = -1 hi = self.shape[1] ranges = self._v_parent.rvcache boundscache = self.boundscache # First, look at the beginning of the slice begin = ranges[nrow, 0] # Look for items at the beginning of sorted slices if item1 <= begin: result1 = 0 if item2 < begin: result2 = 0 if result1 >= 0 and result2 >= 0: return (result1, result2) # Then, look for items at the end of the sorted slice end = ranges[nrow, 1] if result1 < 0: if item1 > end: result1 = hi if result2 < 0: if item2 >= end: result2 = hi if result1 >= 0 and result2 >= 0: return (result1, result2) # Finally, do a lookup for item1 and item2 if they were not found # Lookup in the middle of slice for item1 chunksize = self.chunksize # Number of elements/chunksize nchunk = -1 # Try to get the bounds row from the LRU cache nslot = boundscache.getslot(nrow) if nslot >= 0: # Cache hit. Use the row kept there. bounds = boundscache.getitem(nslot) else: # No luck with cached data. Read the row and put it in the cache. bounds = self._v_parent.bounds[nrow] size = bounds.size * bounds.itemsize boundscache.setitem(nrow, bounds, size) if result1 < 0: # Search the appropriate chunk in bounds cache nchunk = bisect_left(bounds, item1) chunk = self._read_sorted_slice(nrow, chunksize * nchunk, chunksize * (nchunk + 1)) result1 = self._bisect_left(chunk, item1, chunksize) result1 += chunksize * nchunk # Lookup in the middle of slice for item2 if result2 < 0: # Search the appropriate chunk in bounds cache nchunk2 = bisect_right(bounds, item2) if nchunk2 != nchunk: chunk = self._read_sorted_slice(nrow, chunksize * nchunk2, chunksize * (nchunk2 + 1)) result2 = self._bisect_right(chunk, item2, chunksize) result2 += chunksize * nchunk2 return (result1, result2) _searchBin = previous_api(_search_bin) def __str__(self): "A compact representation of this class" return "IndexArray(path=%s)" % self._v_pathname def __repr__(self): """A verbose representation of this class""" return """%s atom = %r shape = %s nrows = %s chunksize = %s slicesize = %s byteorder = %r""" % (self, self.atom, self.shape, self.nrows, self.chunksize, self.slicesize, self.byteorder)
class ExternalLink(linkextension.ExternalLink, Link): """Represents an external link. An external link is a reference to a node in *another* file. Getting access to the pointed node (this action is called *dereferencing*) is done via the :meth:`__call__` special method (see below). .. rubric:: ExternalLink attributes .. attribute:: extfile The external file handler, if the link has been dereferenced. In case the link has not been dereferenced yet, its value is None. """ # Class identifier. _c_classid = 'EXTERNALLINK' _c_classId = previous_api_property('_c_classid') def __init__(self, parentnode, name, target=None, _log=False): self.extfile = None """The external file handler, if the link has been dereferenced. In case the link has not been dereferenced yet, its value is None.""" super(ExternalLink, self).__init__(parentnode, name, target, _log) def _get_filename_node(self): """Return the external filename and nodepath from `self.target`.""" # This is needed for avoiding the 'C:\\file.h5' filepath notation filename, target = self.target.split(':/') return filename, '/' + target def __call__(self, **kwargs): """Dereference self.target and return the object. You can pass all the arguments supported by the :func:`open_file` function (except filename, of course) so as to open the referenced external file. Examples -------- :: >>> f=tables.open_file('data1/test1.h5') >>> print(f.root.link2) /link2 (ExternalLink) -> data2/test2.h5:/path/to/node >>> plink2 = f.root.link2('a') # open in 'a'ppend mode >>> print(plink2) /path/to/node (Group) '' >>> print(plink2._v_filename) 'data2/test2.h5' # belongs to referenced file """ filename, target = self._get_filename_node() if not os.path.isabs(filename): # Resolve the external link with respect to the this # file's directory. See #306. base_directory = os.path.dirname(self._v_file.filename) filename = os.path.join(base_directory, filename) if self.extfile is None or not self.extfile.isopen: self.extfile = tables.open_file(filename, **kwargs) else: # XXX: implement better consistency checks assert self.extfile.filename == filename assert self.extfile.mode == kwargs.get('mode', 'r') return self.extfile._get_node(target) def umount(self): """Safely unmount self.extfile, if opened.""" extfile = self.extfile # Close external file, if open if extfile is not None and extfile.isopen: extfile.close() self.extfile = None def _f_close(self): """Especific close for external links.""" self.umount() super(ExternalLink, self)._f_close() def __str__(self): """Return a short string representation of the link. Examples -------- :: >>> f=tables.open_file('data1/test1.h5') >>> print(f.root.link2) /link2 (ExternalLink) -> data2/test2.h5:/path/to/node """ classname = self.__class__.__name__ return "%s (%s) -> %s" % (self._v_pathname, classname, self.target)