redo_add_attr(file_, args[0], args[1]) elif operation == 'DELATTR': redo_del_attr(file_, args[0], args[1]) else: raise NotImplementedError("the requested unknown operation %r can " "not be redone; please report this to the " "authors" % operation) def move_to_shadow(file_, path): node = file_._get_node(path) (shparent, shname) = file_._shadow_name() node._g_move(shparent, shname) moveToShadow = previous_api(move_to_shadow) def move_from_shadow(file_, path): (shparent, shname) = file_._shadow_name() node = shparent._f_get_child(shname) (pname, name) = split_path(path) parent = file_._get_node(pname) node._g_move(parent, name) moveFromShadow = previous_api(move_from_shadow) def undo_create(file_, path): move_to_shadow(file_, path)
chunksize, slicesize = ccs_ultralight(optlevel, chunksize, slicesize) elif indsize == 2: # light chunksize, slicesize = ccs_light(optlevel, chunksize, slicesize) elif indsize == 4: # medium chunksize, slicesize = ccs_medium(optlevel, chunksize, slicesize) elif indsize == 8: # full chunksize, slicesize = ccs_full(optlevel, chunksize, slicesize) # Finally, compute blocksize and superblocksize blocksize = computeblocksize(expectedrows, slicesize, chunksize) superblocksize = computeblocksize(expectedrows, blocksize, slicesize) # The size for different blocks information sizes = (superblocksize, blocksize, slicesize, chunksize) return sizes calcChunksize = previous_api(calc_chunksize) def ccs_ultralight(optlevel, chunksize, slicesize): """Correct the slicesize and the chunksize based on optlevel.""" if optlevel in (0, 1, 2): slicesize //= 2 slicesize += optlevel * slicesize elif optlevel in (3, 4, 5): slicesize *= optlevel - 1 elif optlevel in (6, 7, 8): slicesize *= optlevel - 1 elif optlevel == 9: slicesize *= optlevel - 1 return chunksize, slicesize
# We need to use this strange way to obtain a dtype compliant # array because NumPy doesn't honor the shape of the dtype when # it is multidimensional. See: # http://scipy.org/scipy/numpy/ticket/926 # for details. # All of this is done just to taking advantage of the NumPy # broadcasting rules. newshape = nparr.shape[: -len(atom.dtype.shape)] nparr2 = numpy.empty(newshape, dtype=[("", atom.dtype)]) nparr2["f0"][:] = nparr # Return a view (i.e. get rid of the record type) nparr = nparr2.view(atom.dtype) return nparr convertToNPAtom = previous_api(convert_to_np_atom) # The next is used in Array, EArray and VLArray, and it is a bit more # high level than convert_to_np_atom def convert_to_np_atom2(object, atom): """Convert a generic object into a NumPy object compliant with atom.""" # Check whether the object needs to be copied to make the operation # safe to in-place conversion. copy = atom.type in ["time64"] nparr = convert_to_np_atom(object, atom, copy) # Finally, check the byteorder and change it if needed byteorder = byteorders[nparr.dtype.byteorder] if byteorder in ["little", "big"] and byteorder != sys.byteorder: # The byteorder needs to be fixed (a copy is made
print("[%s] %s" % (i, leaf[i])) if isinstance(leaf, Table) and options.colinfo: # Show info of columns for colname in leaf.colnames: print(repr(leaf.cols._f_col(colname))) if isinstance(leaf, Table) and options.idxinfo: # Show info of indexes for colname in leaf.colnames: col = leaf.cols._f_col(colname) if isinstance(col, Column) and col.index is not None: idx = col.index print(repr(idx)) dumpLeaf = previous_api(dump_leaf) def dump_group(pgroup, sort=False): node_kinds = pgroup._v_file._node_kinds[1:] what = pgroup._f_walk_groups() if sort: what = sorted(what, key=operator.attrgetter('_v_pathname')) for group in what: print(str(group)) if options.showattrs: print(" "+repr(group._v_attrs)) for kind in node_kinds: for node in group._f_list_nodes(kind): if options.verbose or options.dump: dump_leaf(node)
if nparr.shape != slice_shape: # Create an array compliant with the specified shape narr = numpy.empty(shape=slice_shape, dtype=self.atom.dtype) # Assign the value to it try: narr[...] = nparr except Exception, exc: # XXX raise ValueError("value parameter '%s' cannot be converted " "into an array object compliant with %s: " "'%r' The error was: <%s>" % ( nparr, self.__class__.__name__, self, exc)) return narr return nparr _checkShape = previous_api(_check_shape) def _read_slice(self, startl, stopl, stepl, shape): """Read a slice based on `startl`, `stopl` and `stepl`.""" nparr = numpy.empty(dtype=self.atom.dtype, shape=shape) # Protection against reading empty arrays if 0 not in shape: # Arrays that have non-zero dimensionality self._g_read_slice(startl, stopl, stepl, nparr) # For zero-shaped arrays, return the scalar if nparr.shape == (): nparr = nparr[()] return nparr _readSlice = previous_api(_read_slice)
"""Get the node class matching the `classname`. If the name is not registered, a ``TypeError`` is raised. The empty string and ``None`` are also accepted, and mean the ``Node`` class. .. versionadded:: 3.0 """ # The empty string is accepted for compatibility # with old default arguments. if classname is None or classname == "": classname = "Node" # Get the class object corresponding to `classname`. if classname not in class_name_dict: raise TypeError("there is no registered node class named ``%s``" % (classname,)) return class_name_dict[classname] getClassByName = previous_api(get_class_by_name) ## Local Variables: ## mode: python ## py-indent-offset: 4 ## tab-width: 4 ## fill-column: 72 ## End:
# Close everything else (i.e. indices) closenodes(prefix, [path for path in alivenodes], lambda path: alivenodes[path]) # Next, revive the dead nodes, close and delete them # so they are not placed in the limbo again. # These two steps ensure tables are closed *before* their indices. closenodes( prefix, [path for path in deadnodes if '/_i_' not in path], # not indices lambda path: revivenode(path)) # Close everything else (i.e. indices) closenodes(prefix, [path for path in deadnodes], lambda path: revivenode(path)) _g_closeDescendents = previous_api(_g_close_descendents) def _g_close(self): """Close this (open) group.""" # hdf5extension operations: # Close HDF5 group. self._g_close_group() # Close myself as a node. super(Group, self)._f_close() def _f_close(self): """Close this group and all its descendents. This method has the behavior described in :meth:`Node._f_close`.
class AttributeSet(hdf5extension.AttributeSet, object): """Container for the HDF5 attributes of a Node. This class provides methods to create new HDF5 node attributes, and to get, rename or delete existing ones. Like in Group instances (see :ref:`GroupClassDescr`), AttributeSet instances make use of the *natural naming* convention, i.e. you can access the attributes on disk as if they were normal Python attributes of the AttributeSet instance. This offers the user a very convenient way to access HDF5 node attributes. However, for this reason and in order not to pollute the object namespace, one can not assign *normal* attributes to AttributeSet instances, and their members use names which start by special prefixes as happens with Group objects. .. rubric:: Notes on native and pickled attributes The values of most basic types are saved as HDF5 native data in the HDF5 file. This includes Python bool, int, float, complex and str (but not long nor unicode) values, as well as their NumPy scalar versions and homogeneous or *structured* NumPy arrays of them. When read, these values are always loaded as NumPy scalar or array objects, as needed. For that reason, attributes in native HDF5 files will be always mapped into NumPy objects. Specifically, a multidimensional attribute will be mapped into a multidimensional ndarray and a scalar will be mapped into a NumPy scalar object (for example, a scalar H5T_NATIVE_LLONG will be read and returned as a numpy.int64 scalar). However, other kinds of values are serialized using pickle, so you only will be able to correctly retrieve them using a Python-aware HDF5 library. Thus, if you want to save Python scalar values and make sure you are able to read them with generic HDF5 tools, you should make use of *scalar or homogeneous/structured array NumPy objects* (for example, numpy.int64(1) or numpy.array([1, 2, 3], dtype='int16')). One more advice: because of the various potential difficulties in restoring a Python object stored in an attribute, you may end up getting a pickle string where a Python object is expected. If this is the case, you may wish to run pickle.loads() on that string to get an idea of where things went wrong, as shown in this example:: >>> import os, tempfile >>> import tables >>> >>> class MyClass(object): ... foo = 'bar' ... >>> myObject = MyClass() # save object of custom class in HDF5 attr >>> h5fname = tempfile.mktemp(suffix='.h5') >>> h5f = tables.open_file(h5fname, 'w') >>> h5f.root._v_attrs.obj = myObject # store the object >>> print(h5f.root._v_attrs.obj.foo) # retrieve it bar >>> h5f.close() >>> >>> del MyClass, myObject # delete class of object and reopen file >>> h5f = tables.open_file(h5fname, 'r') >>> print(repr(h5f.root._v_attrs.obj)) 'ccopy_reg\\n_reconstructor... >>> import pickle # let's unpickle that to see what went wrong >>> pickle.loads(h5f.root._v_attrs.obj) Traceback (most recent call last): ... AttributeError: 'module' object has no attribute 'MyClass' >>> # So the problem was not in the stored object, ... # but in the *environment* where it was restored. ... h5f.close() >>> os.remove(h5fname) .. rubric:: Notes on AttributeSet methods Note that this class overrides the __getattr__(), __setattr__() and __delattr__() special methods. This allows you to read, assign or delete attributes on disk by just using the next constructs:: leaf.attrs.myattr = 'str attr' # set a string (native support) leaf.attrs.myattr2 = 3 # set an integer (native support) leaf.attrs.myattr3 = [3, (1, 2)] # a generic object (Pickled) attrib = leaf.attrs.myattr # get the attribute ``myattr`` del leaf.attrs.myattr # delete the attribute ``myattr`` In addition, the dictionary-like __getitem__(), __setitem__() and __delitem__() methods are available, so you may write things like this:: for name in node._v_attrs._f_list(): print("name: %s, value: %s" % (name, node._v_attrs[name])) Use whatever idiom you prefer to access the attributes. If an attribute is set on a target node that already has a large number of attributes, a PerformanceWarning will be issued. .. rubric:: AttributeSet attributes .. attribute:: _v_attrnames A list with all attribute names. .. attribute:: _v_attrnamessys A list with system attribute names. .. attribute:: _v_attrnamesuser A list with user attribute names. .. attribute:: _v_unimplemented A list of attribute names with unimplemented native HDF5 types. """ def _g_getnode(self): return self._v__nodefile._get_node(self._v__nodepath) _v_node = property( _g_getnode, None, None, "The :class:`Node` instance this attribute set is " "associated with.") def __init__(self, node): """Create the basic structures to keep the attribute information. Reads all the HDF5 attributes (if any) on disk for the node "node". Parameters ---------- node The parent node """ # Refuse to create an instance of an already closed node if not node._v_isopen: raise ClosedNodeError("the node for attribute set is closed") dict_ = self.__dict__ self._g_new(node) dict_["_v__nodefile"] = node._v_file dict_["_v__nodepath"] = node._v_pathname dict_["_v_attrnames"] = self._g_list_attr(node) # The list of unimplemented attribute names dict_["_v_unimplemented"] = [] # Get the file version format. This is an optimization # in order to avoid accessing it too much. try: format_version = node._v_file.format_version except AttributeError: parsed_version = None else: if format_version == 'unknown': parsed_version = None else: parsed_version = tuple(map(int, format_version.split('.'))) dict_["_v__format_version"] = parsed_version # Split the attribute list in system and user lists dict_["_v_attrnamessys"] = [] dict_["_v_attrnamesuser"] = [] for attr in self._v_attrnames: # put the attributes on the local dictionary to allow # tab-completion self.__getattr__(attr) if issysattrname(attr): self._v_attrnamessys.append(attr) else: self._v_attrnamesuser.append(attr) # Sort the attributes self._v_attrnames.sort() self._v_attrnamessys.sort() self._v_attrnamesuser.sort() def _g_update_node_location(self, node): """Updates the location information about the associated `node`.""" dict_ = self.__dict__ dict_['_v__nodefile'] = node._v_file dict_['_v__nodepath'] = node._v_pathname # hdf5extension operations: self._g_new(node) _g_updateNodeLocation = previous_api(_g_update_node_location) def _f_list(self, attrset='user'): """Get a list of attribute names. The attrset string selects the attribute set to be used. A 'user' value returns only user attributes (this is the default). A 'sys' value returns only system attributes. Finally, 'all' returns both system and user attributes. """ if attrset == "user": return self._v_attrnamesuser[:] elif attrset == "sys": return self._v_attrnamessys[:] elif attrset == "all": return self._v_attrnames[:] def __getattr__(self, name): """Get the attribute named "name".""" # If attribute does not exist, raise AttributeError if not name in self._v_attrnames: raise AttributeError("Attribute '%s' does not exist in node: " "'%s'" % (name, self._v__nodepath)) # Read the attribute from disk. This is an optimization to read # quickly system attributes that are _string_ values, but it # takes care of other types as well as for example NROWS for # Tables and EXTDIM for EArrays format_version = self._v__format_version value = self._g_getattr(self._v_node, name) # Check whether the value is pickled # Pickled values always seems to end with a "." maybe_pickled = ( isinstance(value, numpy.generic) and # NumPy scalar? value.dtype.type == numpy.bytes_ and # string type? value.itemsize > 0 and value.endswith(b'.')) if (maybe_pickled and value in [b"0", b"0."]): # Workaround for a bug in many versions of Python (starting # somewhere after Python 2.6.1). See ticket #253. retval = value elif (maybe_pickled and _field_fill_re.match(name) and format_version == (1, 5)): # This format was used during the first 1.2 releases, just # for string defaults. try: retval = cPickle.loads(value) retval = numpy.array(retval) except ImportError: retval = None # signal error avoiding exception elif maybe_pickled and name == 'FILTERS' and format_version < (2, 0): # This is a big hack, but we don't have other way to recognize # pickled filters of PyTables 1.x files. value = _old_filters_re.sub(_new_filters_sub, value, 1) retval = cPickle.loads(value) # pass unpickling errors through elif maybe_pickled: try: retval = cPickle.loads(value) # except cPickle.UnpicklingError: # It seems that pickle may raise other errors than UnpicklingError # Perhaps it would be better just an "except:" clause? # except (cPickle.UnpicklingError, ImportError): # Definitely (see SF bug #1254636) except: # ivb (2005-09-07): It is too hard to tell # whether the unpickling failed # because of the string not being a pickle one at all, # because of a malformed pickle string, # or because of some other problem in object reconstruction, # thus making inconvenient even the issuing of a warning here. # The documentation contains a note on this issue, # explaining how the user can tell where the problem was. retval = value # Additional check for allowing a workaround for #307 if isinstance(retval, unicode) and retval == u'': retval = numpy.array(retval)[()] elif name == 'FILTERS' and format_version >= (2, 0): retval = Filters._unpack(value) elif name == 'TITLE' and not isinstance(value, str): if sys.version_info[0] < 3: # unicode is OK for TITLE retval = value else: retval = value.decode('utf-8') elif (issysattrname(name) and isinstance(value, (bytes, unicode)) and not isinstance(value, str) and not _field_fill_re.match(name)): # system attributes should always be str if sys.version_info[0] < 3: retval = value.encode() else: # python 3, bytes and not "FIELD_[0-9]+_FILL" retval = value.decode('utf-8') else: retval = value # Put this value in local directory self.__dict__[name] = retval return retval def _g__setattr(self, name, value): """Set a PyTables attribute. Sets a (maybe new) PyTables attribute with the specified `name` and `value`. If the attribute already exists, it is simply replaced. It does not log the change. """ # Save this attribute to disk # (overwriting an existing one if needed) stvalue = value if issysattrname(name): if name in ["EXTDIM", "AUTO_INDEX", "DIRTY", "NODE_TYPE_VERSION"]: stvalue = numpy.array(value, dtype=numpy.int32) value = stvalue[()] elif name == "NROWS": stvalue = numpy.array(value, dtype=SizeType) value = stvalue[()] elif name == "FILTERS" and self._v__format_version >= (2, 0): stvalue = value._pack() # value will remain as a Filters instance here # Convert value from a Python scalar into a NumPy scalar # (only in case it has not been converted yet) # Fixes ticket #59 if (stvalue is value and type(value) in (bool, bytes, int, float, complex, unicode, numpy.unicode_)): # Additional check for allowing a workaround for #307 if isinstance(value, unicode) and len(value) == 0: stvalue = numpy.array(u'') else: stvalue = numpy.array(value) value = stvalue[()] self._g_setattr(self._v_node, name, stvalue) # New attribute or value. Introduce it into the local # directory self.__dict__[name] = value # Finally, add this attribute to the list if not present attrnames = self._v_attrnames if not name in attrnames: attrnames.append(name) attrnames.sort() if issysattrname(name): attrnamessys = self._v_attrnamessys attrnamessys.append(name) attrnamessys.sort() else: attrnamesuser = self._v_attrnamesuser attrnamesuser.append(name) attrnamesuser.sort() def __setattr__(self, name, value): """Set a PyTables attribute. Sets a (maybe new) PyTables attribute with the specified `name` and `value`. If the attribute already exists, it is simply replaced. A ``ValueError`` is raised when the name starts with a reserved prefix or contains a ``/``. A `NaturalNameWarning` is issued if the name is not a valid Python identifier. A `PerformanceWarning` is issued when the recommended maximum number of attributes in a node is going to be exceeded. """ nodefile = self._v__nodefile attrnames = self._v_attrnames # Check for name validity check_name_validity(name) nodefile._check_writable() # Check if there are too many attributes. max_node_attrs = nodefile.params['MAX_NODE_ATTRS'] if len(attrnames) >= max_node_attrs: warnings.warn( """\ node ``%s`` is exceeding the recommended maximum number of attributes (%d);\ be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" % (self._v__nodepath, max_node_attrs), PerformanceWarning) undo_enabled = nodefile.is_undo_enabled() # Log old attribute removal (if any). if undo_enabled and (name in attrnames): self._g_del_and_log(name) # Set the attribute. self._g__setattr(name, value) # Log new attribute addition. if undo_enabled: self._g_log_add(name) def _g_log_add(self, name): self._v__nodefile._log('ADDATTR', self._v__nodepath, name) _g_logAdd = previous_api(_g_log_add) def _g_del_and_log(self, name): nodefile = self._v__nodefile node_pathname = self._v__nodepath # Log *before* moving to use the right shadow name. nodefile._log('DELATTR', node_pathname, name) attr_to_shadow(nodefile, node_pathname, name) _g_delAndLog = previous_api(_g_del_and_log) def _g__delattr(self, name): """Delete a PyTables attribute. Deletes the specified existing PyTables attribute. It does not log the change. """ # Delete the attribute from disk self._g_remove(self._v_node, name) # Delete the attribute from local lists self._v_attrnames.remove(name) if name in self._v_attrnamessys: self._v_attrnamessys.remove(name) else: self._v_attrnamesuser.remove(name) # Delete the attribute from the local directory # closes (#1049285) del self.__dict__[name] def __delattr__(self, name): """Delete a PyTables attribute. Deletes the specified existing PyTables attribute from the attribute set. If a nonexistent or system attribute is specified, an ``AttributeError`` is raised. """ nodefile = self._v__nodefile # Check if attribute exists if name not in self._v_attrnames: raise AttributeError( "Attribute ('%s') does not exist in node '%s'" % (name, self._v__nodepath)) nodefile._check_writable() # Remove the PyTables attribute or move it to shadow. if nodefile.is_undo_enabled(): self._g_del_and_log(name) else: self._g__delattr(name) def __getitem__(self, name): """The dictionary like interface for __getattr__().""" try: return self.__getattr__(name) except AttributeError: # Capture the AttributeError an re-raise a KeyError one raise KeyError("Attribute ('%s') does not exist in node '%s'" % (name, self._v__nodepath)) def __setitem__(self, name, value): """The dictionary like interface for __setattr__().""" self.__setattr__(name, value) def __delitem__(self, name): """The dictionary like interface for __delattr__().""" try: self.__delattr__(name) except AttributeError: # Capture the AttributeError an re-raise a KeyError one raise KeyError("Attribute ('%s') does not exist in node '%s'" % (name, self._v__nodepath)) def __contains__(self, name): """Is there an attribute with that name? A true value is returned if the attribute set has an attribute with the given name, false otherwise. """ return name in self._v_attrnames def _f_rename(self, oldattrname, newattrname): """Rename an attribute from oldattrname to newattrname.""" if oldattrname == newattrname: # Do nothing return # First, fetch the value of the oldattrname attrvalue = getattr(self, oldattrname) # Now, create the new attribute setattr(self, newattrname, attrvalue) # Finally, remove the old attribute delattr(self, oldattrname) def _g_copy(self, newset, set_attr=None, copyclass=False): """Copy set attributes. Copies all user and allowed system PyTables attributes to the given attribute set, replacing the existing ones. You can specify a *bound* method of the destination set that will be used to set its attributes. Else, its `_g__setattr` method will be used. Changes are logged depending on the chosen setting method. The default setting method does not log anything. .. versionchanged:: 3.0 The *newSet* parameter has been renamed into *newset*. .. versionchanged:: 3.0 The *copyClass* parameter has been renamed into *copyclass*. """ copysysattrs = newset._v__nodefile.params['PYTABLES_SYS_ATTRS'] if set_attr is None: set_attr = newset._g__setattr for attrname in self._v_attrnamesuser: # Do not copy the unimplemented attributes. if attrname not in self._v_unimplemented: set_attr(attrname, getattr(self, attrname)) # Copy the system attributes that we are allowed to. if copysysattrs: for attrname in self._v_attrnamessys: if ((attrname not in SYS_ATTRS_NOTTOBECOPIED) and # Do not copy the FIELD_ attributes in tables as this can # be really *slow* (don't know exactly the reason). # See #304. not attrname.startswith("FIELD_")): set_attr(attrname, getattr(self, attrname)) # Copy CLASS and VERSION attributes if requested if copyclass: for attrname in FORCE_COPY_CLASS: if attrname in self._v_attrnamessys: set_attr(attrname, getattr(self, attrname)) def _f_copy(self, where): """Copy attributes to the where node. Copies all user and certain system attributes to the given where node (a Node instance - see :ref:`NodeClassDescr`), replacing the existing ones. """ # AttributeSet must be defined in order to define a Node. # However, we need to know Node here. # Using class_name_dict avoids a circular import. if not isinstance(where, class_name_dict['Node']): raise TypeError("destination object is not a node: %r" % (where, )) self._g_copy(where._v_attrs, where._v_attrs.__setattr__) def _g_close(self): # Nothing will be done here, as the existing instance is completely # operative now. pass def __str__(self): """The string representation for this object.""" # The pathname pathname = self._v__nodepath # Get this class name classname = self.__class__.__name__ # The attribute names attrnumber = len([n for n in self._v_attrnames]) return "%s._v_attrs (%s), %s attributes" % \ (pathname, classname, attrnumber) def __repr__(self): """A detailed string representation for this object.""" # print additional info only if there are attributes to show attrnames = [n for n in self._v_attrnames] if len(attrnames): rep = [ '%s := %r' % (attr, getattr(self, attr)) for attr in attrnames ] attrlist = '[%s]' % (',\n '.join(rep)) return "%s:\n %s" % (str(self), attrlist) else: return str(self)
class ProxyDict(dict): """A dictionary which uses a container object to store its values.""" containerRef = previous_api_property('containerref') def __init__(self, container): self.containerref = weakref.ref(container) """A weak reference to the container object. .. versionchanged:: 3.0 The *containerRef* attribute has been renamed into *containerref*. """ def __getitem__(self, key): if key not in self: raise KeyError(key) # Values are not actually stored to avoid extra references. return self._get_value_from_container(self._get_container(), key) def __setitem__(self, key, value): # Values are not actually stored to avoid extra references. super(ProxyDict, self).__setitem__(key, None) def __repr__(self): return object.__repr__(self) def __str__(self): # C implementation does not use `self.__getitem__()`. :( itemFormat = '%r: %r' itemReprs = [itemFormat % item for item in self.iteritems()] return '{%s}' % ', '.join(itemReprs) def values(self): # C implementation does not use `self.__getitem__()`. :( valueList = [] for key in self.iterkeys(): valueList.append(self[key]) return valueList def itervalues(self): # C implementation does not use `self.__getitem__()`. :( for key in self.iterkeys(): yield self[key] raise StopIteration def items(self): # C implementation does not use `self.__getitem__()`. :( itemList = [] for key in self.iterkeys(): itemList.append((key, self[key])) return itemList def iteritems(self): # C implementation does not use `self.__getitem__()`. :( for key in self.iterkeys(): yield (key, self[key]) raise StopIteration def _get_container(self): container = self.containerref() if container is None: raise ValueError("the container object does no longer exist") return container _getContainer = previous_api(_get_container)
if isinstance(leaf, Table) and options.colinfo: # Show info of columns for colname in leaf.colnames: print(repr(leaf.cols._f_col(colname))) if isinstance(leaf, Table) and options.idxinfo: # Show info of indexes for colname in leaf.colnames: col = leaf.cols._f_col(colname) if isinstance(col, Column) and col.index is not None: idx = col.index print(repr(idx)) dumpLeaf = previous_api(dump_leaf) def dump_group(pgroup): node_kinds = pgroup._v_file._node_kinds[1:] for group in pgroup._f_walk_groups(): print(str(group)) if options.showattrs: print(" " + repr(group._v_attrs)) for kind in node_kinds: for node in group._f_list_nodes(kind): if options.verbose or options.dump: dump_leaf(node) else: print(str(node))
elif indsize == 2: # light chunksize, slicesize = ccs_light(optlevel, chunksize, slicesize) elif indsize == 4: # medium chunksize, slicesize = ccs_medium(optlevel, chunksize, slicesize) elif indsize == 8: # full chunksize, slicesize = ccs_full(optlevel, chunksize, slicesize) # Finally, compute blocksize and superblocksize blocksize = computeblocksize(expectedrows, slicesize, chunksize) superblocksize = computeblocksize(expectedrows, blocksize, slicesize) # The size for different blocks information sizes = (superblocksize, blocksize, slicesize, chunksize) return sizes calcChunksize = previous_api(calc_chunksize) def ccs_ultralight(optlevel, chunksize, slicesize): """Correct the slicesize and the chunksize based on optlevel.""" if optlevel in (0, 1, 2): slicesize //= 2 slicesize += optlevel * slicesize elif optlevel in (3, 4, 5): slicesize *= optlevel - 1 elif optlevel in (6, 7, 8): slicesize *= optlevel - 1 elif optlevel == 9: slicesize *= optlevel - 1 return chunksize, slicesize
class IndexArray(NotLoggedMixin, EArray, indexesextension.IndexArray): """Represent the index (sorted or reverse index) dataset in HDF5 file. All NumPy typecodes are supported except for complex datatypes. Parameters ---------- parentnode The Index class from which this object will hang off. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode*. name : str The name of this node in its parent group. atom An Atom object representing the shape and type of the atomic objects to be saved. Only scalar atoms are supported. title Sets a TITLE attribute on the array entity. filters : Filters An instance of the Filters class that provides information about the desired I/O filters to be applied during the life of this object. byteorder The byteroder of the data on-disk. """ # Class identifier. _c_classid = 'INDEXARRAY' _c_classId = previous_api_property('_c_classid') # Properties # ~~~~~~~~~~ chunksize = property(lambda self: self.chunkshape[1], None, None, """The chunksize for this object.""") slicesize = property(lambda self: self.shape[1], None, None, """The slicesize for this object.""") # Other methods # ~~~~~~~~~~~~~ def __init__(self, parentnode, name, atom=None, title="", filters=None, byteorder=None): """Create an IndexArray instance.""" self._v_pathname = parentnode._g_join(name) if atom is not None: # The shape and chunkshape needs to be fixed here if name == "sorted": reduction = parentnode.reduction shape = (0, parentnode.slicesize // reduction) chunkshape = (1, parentnode.chunksize // reduction) else: shape = (0, parentnode.slicesize) chunkshape = (1, parentnode.chunksize) else: # The shape and chunkshape will be read from disk later on shape = None chunkshape = None super(IndexArray, self).__init__(parentnode, name, atom, shape, title, filters, chunkshape=chunkshape, byteorder=byteorder) # This version of searchBin uses both ranges (1st level) and # bounds (2nd level) caches. It uses a cache for boundary rows, # but not for 'sorted' rows (this is only supported for the # 'optimized' types). def _search_bin(self, nrow, item): item1, item2 = item result1 = -1 result2 = -1 hi = self.shape[1] ranges = self._v_parent.rvcache boundscache = self.boundscache # First, look at the beginning of the slice begin = ranges[nrow, 0] # Look for items at the beginning of sorted slices if item1 <= begin: result1 = 0 if item2 < begin: result2 = 0 if result1 >= 0 and result2 >= 0: return (result1, result2) # Then, look for items at the end of the sorted slice end = ranges[nrow, 1] if result1 < 0: if item1 > end: result1 = hi if result2 < 0: if item2 >= end: result2 = hi if result1 >= 0 and result2 >= 0: return (result1, result2) # Finally, do a lookup for item1 and item2 if they were not found # Lookup in the middle of slice for item1 chunksize = self.chunksize # Number of elements/chunksize nchunk = -1 # Try to get the bounds row from the LRU cache nslot = boundscache.getslot(nrow) if nslot >= 0: # Cache hit. Use the row kept there. bounds = boundscache.getitem(nslot) else: # No luck with cached data. Read the row and put it in the cache. bounds = self._v_parent.bounds[nrow] size = bounds.size * bounds.itemsize boundscache.setitem(nrow, bounds, size) if result1 < 0: # Search the appropriate chunk in bounds cache nchunk = bisect_left(bounds, item1) chunk = self._read_sorted_slice(nrow, chunksize * nchunk, chunksize * (nchunk + 1)) result1 = self._bisect_left(chunk, item1, chunksize) result1 += chunksize * nchunk # Lookup in the middle of slice for item2 if result2 < 0: # Search the appropriate chunk in bounds cache nchunk2 = bisect_right(bounds, item2) if nchunk2 != nchunk: chunk = self._read_sorted_slice(nrow, chunksize * nchunk2, chunksize * (nchunk2 + 1)) result2 = self._bisect_right(chunk, item2, chunksize) result2 += chunksize * nchunk2 return (result1, result2) _searchBin = previous_api(_search_bin) def __str__(self): "A compact representation of this class" return "IndexArray(path=%s)" % self._v_pathname def __repr__(self): """A verbose representation of this class.""" return """%s atom = %r shape = %s nrows = %s chunksize = %s slicesize = %s byteorder = %r""" % (self, self.atom, self.shape, self.nrows, self.chunksize, self.slicesize, self.byteorder)
class Array(hdf5extension.Array, Leaf): """This class represents homogeneous datasets in an HDF5 file. This class provides methods to write or read data to or from array objects in the file. This class does not allow you neither to enlarge nor compress the datasets on disk; use the EArray class (see :ref:`EArrayClassDescr`) if you want enlargeable dataset support or compression features, or CArray (see :ref:`CArrayClassDescr`) if you just want compression. An interesting property of the Array class is that it remembers the *flavor* of the object that has been saved so that if you saved, for example, a list, you will get a list during readings afterwards; if you saved a NumPy array, you will get a NumPy object, and so forth. Note that this class inherits all the public attributes and methods that Leaf (see :ref:`LeafClassDescr`) already provides. However, as Array instances have no internal I/O buffers, it is not necessary to use the flush() method they inherit from Leaf in order to save their internal state to disk. When a writing method call returns, all the data is already on disk. Parameters ---------- parentnode The parent :class:`Group` object. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode* name : str The name of this node in its parent group. obj The array or scalar to be saved. Accepted types are NumPy arrays and scalars as well as native Python sequences and scalars, provided that values are regular (i.e. they are not like ``[[1,2],2]``) and homogeneous (i.e. all the elements are of the same type). .. versionchanged:: 3.0 Renamed form *object* into *obj*. title A description for this node (it sets the ``TITLE`` HDF5 attribute on disk). byteorder The byteorder of the data *on disk*, specified as 'little' or 'big'. If this is not specified, the byteorder is that of the given `object`. """ # Class identifier. _c_classid = 'ARRAY' _c_classId = previous_api_property('_c_classid') _v_objectId = previous_api_property('_v_objectid') # Lazy read-only attributes # ````````````````````````` @lazyattr def dtype(self): """The NumPy ``dtype`` that most closely matches this array.""" return self.atom.dtype # Properties # ~~~~~~~~~~ def _getnrows(self): if self.shape == (): return SizeType(1) # scalar case else: return self.shape[self.maindim] nrows = property(_getnrows, None, None, "The number of rows in the array.") def _getrowsize(self): maindim = self.maindim rowsize = self.atom.size for i, dim in enumerate(self.shape): if i != maindim: rowsize *= dim return rowsize rowsize = property( _getrowsize, None, None, "The size of the rows in bytes in dimensions orthogonal to *maindim*.") size_in_memory = property( lambda self: self.nrows * self.rowsize, None, None, """The size of this array's data in bytes when it is fully loaded into memory.""") # Other methods # ~~~~~~~~~~~~~ def __init__(self, parentnode, name, obj=None, title="", byteorder=None, _log=True, _atom=None): self._v_version = None """The object version of this array.""" self._v_new = new = obj is not None """Is this the first time the node has been created?""" self._v_new_title = title """New title for this node.""" self._obj = obj """The object to be stored in the array. It can be any of numpy, list, tuple, string, integer of floating point types, provided that they are regular (i.e. they are not like ``[[1, 2], 2]``). .. versionchanged:: 3.0 Renamed form *_object* into *_obj*. """ self._v_convert = True """Whether the ``Array`` object must be converted or not.""" # Miscellaneous iteration rubbish. self._start = None """Starting row for the current iteration.""" self._stop = None """Stopping row for the current iteration.""" self._step = None """Step size for the current iteration.""" self._nrowsread = None """Number of rows read up to the current state of iteration.""" self._startb = None """Starting row for current buffer.""" self._stopb = None """Stopping row for current buffer. """ self._row = None """Current row in iterators (sentinel).""" self._init = False """Whether we are in the middle of an iteration or not (sentinel).""" self.listarr = None """Current buffer in iterators.""" # Documented (*public*) attributes. self.atom = _atom """An Atom (see :ref:`AtomClassDescr`) instance representing the *type* and *shape* of the atomic objects to be saved. """ self.shape = None """The shape of the stored array.""" self.nrow = None """On iterators, this is the index of the current row.""" self.extdim = -1 # ordinary arrays are not enlargeable """The index of the enlargeable dimension.""" # Ordinary arrays have no filters: leaf is created with default ones. super(Array, self).__init__(parentnode, name, new, Filters(), byteorder, _log) def _g_create(self): """Save a new array in file.""" self._v_version = obversion try: # `Leaf._g_post_init_hook()` should be setting the flavor on disk. self._flavor = flavor = flavor_of(self._obj) nparr = array_as_internal(self._obj, flavor) except: # XXX # Problems converting data. Close the node and re-raise exception. self.close(flush=0) raise # Raise an error in case of unsupported object if nparr.dtype.kind in ['V', 'U', 'O']: # in void, unicode, object raise TypeError("Array objects cannot currently deal with void, " "unicode or object arrays") # Decrease the number of references to the object self._obj = None # Fix the byteorder of data nparr = self._g_fix_byteorder_data(nparr, nparr.dtype.byteorder) # Create the array on-disk try: # ``self._v_objectid`` needs to be set because would be # needed for setting attributes in some descendants later # on (self._v_objectid, self.shape, self.atom) = self._create_array(nparr, self._v_new_title, self.atom) except: # XXX # Problems creating the Array on disk. Close node and re-raise. self.close(flush=0) raise # Compute the optimal buffer size self.nrowsinbuf = self._calc_nrowsinbuf() # Arrays don't have chunkshapes (so, set it to None) self._v_chunkshape = None return self._v_objectid def _g_open(self): """Get the metadata info for an array in file.""" (oid, self.atom, self.shape, self._v_chunkshape) = self._open_array() self.nrowsinbuf = self._calc_nrowsinbuf() return oid def get_enum(self): """Get the enumerated type associated with this array. If this array is of an enumerated type, the corresponding Enum instance (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated type, a TypeError is raised. """ if self.atom.kind != 'enum': raise TypeError("array ``%s`` is not of an enumerated type" % self._v_pathname) return self.atom.enum getEnum = previous_api(get_enum) def iterrows(self, start=None, stop=None, step=None): """Iterate over the rows of the array. This method returns an iterator yielding an object of the current flavor for each selected row in the array. The returned rows are taken from the *main dimension*. If a range is not supplied, *all the rows* in the array are iterated upon - you can also use the :meth:`Array.__iter__` special method for that purpose. If you only want to iterate over a given *range of rows* in the array, you may use the start, stop and step parameters. Examples -------- :: result = [row for row in arrayInstance.iterrows(step=4)] .. versionchanged:: 3.0 If the *start* parameter is provided and *stop* is None then the array is iterated from *start* to the last line. In PyTables < 3.0 only one element was returned. """ try: (self._start, self._stop, self._step) = self._process_range(start, stop, step) except IndexError: # If problems with indexes, silently return the null tuple return () self._init_loop() return self def __iter__(self): """Iterate over the rows of the array. This is equivalent to calling :meth:`Array.iterrows` with default arguments, i.e. it iterates over *all the rows* in the array. Examples -------- :: result = [row[2] for row in array] Which is equivalent to:: result = [row[2] for row in array.iterrows()] """ if not self._init: # If the iterator is called directly, assign default variables self._start = 0 self._stop = self.nrows self._step = 1 # and initialize the loop self._init_loop() return self def _init_loop(self): """Initialization for the __iter__ iterator.""" self._nrowsread = self._start self._startb = self._start self._row = -1 # Sentinel self._init = True # Sentinel self.nrow = SizeType(self._start - self._step) # row number _initLoop = previous_api(_init_loop) def next(self): """Get the next element of the array during an iteration. The element is returned as an object of the current flavor. """ # this could probably be sped up for long iterations by reusing the # listarr buffer if self._nrowsread >= self._stop: self._init = False self.listarr = None # fixes issue #308 raise StopIteration # end of iteration else: # Read a chunk of rows if self._row + 1 >= self.nrowsinbuf or self._row < 0: self._stopb = self._startb + self._step * self.nrowsinbuf # Protection for reading more elements than needed if self._stopb > self._stop: self._stopb = self._stop listarr = self._read(self._startb, self._stopb, self._step) # Swap the axes to easy the return of elements if self.extdim > 0: listarr = listarr.swapaxes(self.extdim, 0) self.listarr = internal_to_flavor(listarr, self.flavor) self._row = -1 self._startb = self._stopb self._row += 1 self.nrow += self._step self._nrowsread += self._step # Fixes bug #968132 # if self.listarr.shape: if self.shape: return self.listarr[self._row] else: return self.listarr # Scalar case def _interpret_indexing(self, keys): """Internal routine used by __getitem__ and __setitem__""" maxlen = len(self.shape) shape = (maxlen, ) startl = numpy.empty(shape=shape, dtype=SizeType) stopl = numpy.empty(shape=shape, dtype=SizeType) stepl = numpy.empty(shape=shape, dtype=SizeType) stop_None = numpy.zeros(shape=shape, dtype=SizeType) if not isinstance(keys, tuple): keys = (keys, ) nkeys = len(keys) dim = 0 # Here is some problem when dealing with [...,...] params # but this is a bit weird way to pass parameters anyway for key in keys: ellipsis = 0 # Sentinel if isinstance(key, type(Ellipsis)): ellipsis = 1 for diml in xrange(dim, len(self.shape) - (nkeys - dim) + 1): startl[dim] = 0 stopl[dim] = self.shape[diml] stepl[dim] = 1 dim += 1 elif dim >= maxlen: raise IndexError("Too many indices for object '%s'" % self._v_pathname) elif is_idx(key): key = operator.index(key) # Protection for index out of range if key >= self.shape[dim]: raise IndexError("Index out of range") if key < 0: # To support negative values (Fixes bug #968149) key += self.shape[dim] start, stop, step = self._process_range(key, key + 1, 1, dim=dim) stop_None[dim] = 1 elif isinstance(key, slice): start, stop, step = self._process_range(key.start, key.stop, key.step, dim=dim) else: raise TypeError("Non-valid index or slice: %s" % key) if not ellipsis: startl[dim] = start stopl[dim] = stop stepl[dim] = step dim += 1 # Complete the other dimensions, if needed if dim < len(self.shape): for diml in xrange(dim, len(self.shape)): startl[dim] = 0 stopl[dim] = self.shape[diml] stepl[dim] = 1 dim += 1 # Compute the shape for the container properly. Fixes #1288792 shape = [] for dim in xrange(len(self.shape)): # The negative division operates differently with python scalars # and numpy scalars (which are similar to C conventions). See: # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3 # and # http://www.peterbe.com/Integer-division-in-programming-languages # for more info on this issue. # I've finally decided to rely on the len(xrange) function. # F. Alted 2006-09-25 # Switch to `lrange` to allow long ranges (see #99). # use xrange, since it supports large integers as of Python 2.6 # see github #181 new_dim = len(xrange(startl[dim], stopl[dim], stepl[dim])) if not (new_dim == 1 and stop_None[dim]): shape.append(new_dim) return startl, stopl, stepl, shape def _fancy_selection(self, args): """Performs a NumPy-style fancy selection in `self`. Implements advanced NumPy-style selection operations in addition to the standard slice-and-int behavior. Indexing arguments may be ints, slices or lists of indices. Note: This is a backport from the h5py project. """ # Internal functions def validate_number(num, length): """Validate a list member for the given axis length.""" try: num = long(num) except TypeError: raise TypeError("Illegal index: %r" % num) if num > length - 1: raise IndexError("Index out of bounds: %d" % num) def expand_ellipsis(args, rank): """Expand ellipsis objects and fill in missing axes.""" n_el = sum(1 for arg in args if arg is Ellipsis) if n_el > 1: raise IndexError("Only one ellipsis may be used.") elif n_el == 0 and len(args) != rank: args = args + (Ellipsis, ) final_args = [] n_args = len(args) for idx, arg in enumerate(args): if arg is Ellipsis: final_args.extend((slice(None), ) * (rank - n_args + 1)) else: final_args.append(arg) if len(final_args) > rank: raise IndexError("Too many indices.") return final_args def translate_slice(exp, length): """Given a slice object, return a 3-tuple (start, count, step) This is for for use with the hyperslab selection routines. """ start, stop, step = exp.start, exp.stop, exp.step if start is None: start = 0 else: start = long(start) if stop is None: stop = length else: stop = long(stop) if step is None: step = 1 else: step = long(step) if step < 1: raise IndexError("Step must be >= 1 (got %d)" % step) if stop == start: raise IndexError("Zero-length selections are not allowed") if stop < start: raise IndexError("Reverse-order selections are not allowed") if start < 0: start = length + start if stop < 0: stop = length + stop if not 0 <= start <= (length - 1): raise IndexError("Start index %s out of range (0-%d)" % (start, length - 1)) if not 1 <= stop <= length: raise IndexError("Stop index %s out of range (1-%d)" % (stop, length)) count = (stop - start) // step if (stop - start) % step != 0: count += 1 if start + count > length: raise IndexError("Selection out of bounds (%d; axis has %d)" % (start + count, length)) return start, count, step # Main code for _fancy_selection mshape = [] selection = [] if not isinstance(args, tuple): args = (args, ) args = expand_ellipsis(args, len(self.shape)) list_seen = False reorder = None for idx, (exp, length) in enumerate(zip(args, self.shape)): if isinstance(exp, slice): start, count, step = translate_slice(exp, length) selection.append((start, count, step, idx, "AND")) mshape.append(count) else: try: exp = list(exp) except TypeError: exp = [exp] # Handle scalar index as a list of length 1 mshape.append(0) # Keep track of scalar index for NumPy else: mshape.append(len(exp)) if len(exp) == 0: raise IndexError( "Empty selections are not allowed (axis %d)" % idx) elif len(exp) > 1: if list_seen: raise IndexError("Only one selection list is allowed") else: list_seen = True else: if (not isinstance(exp[0], (int, long, numpy.integer)) or (isinstance(exp[0], numpy.ndarray) and not numpy.issubdtype(exp[0].dtype, numpy.integer))): raise TypeError("Only integer coordinates allowed.") nexp = numpy.asarray(exp, dtype="i8") # Convert negative values nexp = numpy.where(nexp < 0, length + nexp, nexp) # Check whether the list is ordered or not # (only one unordered list is allowed) if not len(nexp) == len(numpy.unique(nexp)): raise IndexError( "Selection lists cannot have repeated values") neworder = nexp.argsort() if (neworder.shape != (len(exp), ) or numpy.sum( numpy.abs(neworder - numpy.arange(len(exp)))) != 0): if reorder is not None: raise IndexError( "Only one selection list can be unordered") corrected_idx = sum(1 for x in mshape if x != 0) - 1 reorder = (corrected_idx, neworder) nexp = nexp[neworder] for select_idx in xrange(len(nexp) + 1): # This crazy piece of code performs a list selection # using HDF5 hyperslabs. # For each index, perform a "NOTB" selection on every # portion of *this axis* which falls *outside* the list # selection. For this to work, the input array MUST be # monotonically increasing. if select_idx < len(nexp): validate_number(nexp[select_idx], length) if select_idx == 0: start = 0 count = nexp[0] elif select_idx == len(nexp): start = nexp[-1] + 1 count = length - start else: start = nexp[select_idx - 1] + 1 count = nexp[select_idx] - start if count > 0: selection.append((start, count, 1, idx, "NOTB")) mshape = tuple(x for x in mshape if x != 0) return selection, reorder, mshape _fancySelection = previous_api(_fancy_selection) def __getitem__(self, key): """Get a row, a range of rows or a slice from the array. The set of tokens allowed for the key is the same as that for extended slicing in Python (including the Ellipsis or ... token). The result is an object of the current flavor; its shape depends on the kind of slice used as key and the shape of the array itself. Furthermore, NumPy-style fancy indexing, where a list of indices in a certain axis is specified, is also supported. Note that only one list per selection is supported right now. Finally, NumPy-style point and boolean selections are supported as well. Examples -------- :: array1 = array[4] # simple selection array2 = array[4:1000:2] # slice selection array3 = array[1, ..., ::2, 1:4, 4:] # general slice selection array4 = array[1, [1,5,10], ..., -1] # fancy selection array5 = array[np.where(array[:] > 4)] # point selection array6 = array[array[:] > 4] # boolean selection """ self._g_check_open() try: # First, try with a regular selection startl, stopl, stepl, shape = self._interpret_indexing(key) arr = self._read_slice(startl, stopl, stepl, shape) except TypeError: # Then, try with a point-wise selection try: coords = self._point_selection(key) arr = self._read_coords(coords) except TypeError: # Finally, try with a fancy selection selection, reorder, shape = self._fancy_selection(key) arr = self._read_selection(selection, reorder, shape) if self.flavor == "numpy" or not self._v_convert: return arr return internal_to_flavor(arr, self.flavor) def __setitem__(self, key, value): """Set a row, a range of rows or a slice in the array. It takes different actions depending on the type of the key parameter: if it is an integer, the corresponding array row is set to value (the value is broadcast when needed). If key is a slice, the row slice determined by it is set to value (as usual, if the slice to be updated exceeds the actual shape of the array, only the values in the existing range are updated). If value is a multidimensional object, then its shape must be compatible with the shape determined by key, otherwise, a ValueError will be raised. Furthermore, NumPy-style fancy indexing, where a list of indices in a certain axis is specified, is also supported. Note that only one list per selection is supported right now. Finally, NumPy-style point and boolean selections are supported as well. Examples -------- :: a1[0] = 333 # assign an integer to a Integer Array row a2[0] = 'b' # assign a string to a string Array row a3[1:4] = 5 # broadcast 5 to slice 1:4 a4[1:4:2] = 'xXx' # broadcast 'xXx' to slice 1:4:2 # General slice update (a5.shape = (4,3,2,8,5,10). a5[1, ..., ::2, 1:4, 4:] = numpy.arange(1728, shape=(4,3,2,4,3,6)) a6[1, [1,5,10], ..., -1] = arr # fancy selection a7[np.where(a6[:] > 4)] = 4 # point selection + broadcast a8[arr > 4] = arr2 # boolean selection """ self._g_check_open() # Create an array compliant with the specified slice nparr = convert_to_np_atom2(value, self.atom) if nparr.size == 0: return # truncate data if least_significant_digit filter is set # TODO: add the least_significant_digit attribute to the array on disk if (self.filters.least_significant_digit is not None and not numpy.issubdtype(nparr.dtype, int)): nparr = quantize(nparr, self.filters.least_significant_digit) try: startl, stopl, stepl, shape = self._interpret_indexing(key) self._write_slice(startl, stopl, stepl, shape, nparr) except TypeError: # Then, try with a point-wise selection try: coords = self._point_selection(key) self._write_coords(coords, nparr) except TypeError: selection, reorder, shape = self._fancy_selection(key) self._write_selection(selection, reorder, shape, nparr) def _check_shape(self, nparr, slice_shape): """Test that nparr shape is consistent with underlying object. If not, try creating a new nparr object, using broadcasting if necessary. """ if nparr.shape != (slice_shape + self.atom.dtype.shape): # Create an array compliant with the specified shape narr = numpy.empty(shape=slice_shape, dtype=self.atom.dtype) # Assign the value to it. It will raise a ValueError exception # if the objects cannot be broadcast to a single shape. narr[...] = nparr return narr else: return nparr _checkShape = previous_api(_check_shape) def _read_slice(self, startl, stopl, stepl, shape): """Read a slice based on `startl`, `stopl` and `stepl`.""" nparr = numpy.empty(dtype=self.atom.dtype, shape=shape) # Protection against reading empty arrays if 0 not in shape: # Arrays that have non-zero dimensionality self._g_read_slice(startl, stopl, stepl, nparr) # For zero-shaped arrays, return the scalar if nparr.shape == (): nparr = nparr[()] return nparr _readSlice = previous_api(_read_slice) def _read_coords(self, coords): """Read a set of points defined by `coords`.""" nparr = numpy.empty(dtype=self.atom.dtype, shape=len(coords)) if len(coords) > 0: self._g_read_coords(coords, nparr) # For zero-shaped arrays, return the scalar if nparr.shape == (): nparr = nparr[()] return nparr _readCoords = previous_api(_read_coords) def _read_selection(self, selection, reorder, shape): """Read a `selection`. Reorder if necessary. """ # Create the container for the slice nparr = numpy.empty(dtype=self.atom.dtype, shape=shape) # Arrays that have non-zero dimensionality self._g_read_selection(selection, nparr) # For zero-shaped arrays, return the scalar if nparr.shape == (): nparr = nparr[()] elif reorder is not None: # We need to reorder the array idx, neworder = reorder k = [slice(None)] * len(shape) k[idx] = neworder.argsort() # Apparently, a copy is not needed here, but doing it # for symmetry with the `_write_selection()` method. nparr = nparr[k].copy() return nparr _readSelection = previous_api(_read_selection) def _write_slice(self, startl, stopl, stepl, shape, nparr): """Write `nparr` in a slice based on `startl`, `stopl` and `stepl`.""" nparr = self._check_shape(nparr, tuple(shape)) countl = ((stopl - startl - 1) // stepl) + 1 self._g_write_slice(startl, stepl, countl, nparr) _writeSlice = previous_api(_write_slice) def _write_coords(self, coords, nparr): """Write `nparr` values in points defined by `coords` coordinates.""" if len(coords) > 0: nparr = self._check_shape(nparr, (len(coords), )) self._g_write_coords(coords, nparr) _writeCoords = previous_api(_write_coords) def _write_selection(self, selection, reorder, shape, nparr): """Write `nparr` in `selection`. Reorder if necessary. """ nparr = self._check_shape(nparr, tuple(shape)) # Check whether we should reorder the array if reorder is not None: idx, neworder = reorder k = [slice(None)] * len(shape) k[idx] = neworder # For a reason a don't understand well, we need a copy of # the reordered array nparr = nparr[k].copy() self._g_write_selection(selection, nparr) _writeSelection = previous_api(_write_selection) def _read(self, start, stop, step, out=None): """Read the array from disk without slice or flavor processing.""" nrowstoread = len(xrange(0, stop - start, step)) shape = list(self.shape) if shape: shape[self.maindim] = nrowstoread if out is None: arr = numpy.empty(dtype=self.atom.dtype, shape=shape) else: bytes_required = self.rowsize * nrowstoread # if buffer is too small, it will segfault if bytes_required != out.nbytes: raise ValueError( ('output array size invalid, got {0} bytes, ' 'need {1} bytes').format(out.nbytes, bytes_required)) if not out.flags['C_CONTIGUOUS']: raise ValueError('output array not C contiguous') arr = out # Protection against reading empty arrays if 0 not in shape: # Arrays that have non-zero dimensionality self._read_array(start, stop, step, arr) # data is always read in the system byteorder # if the out array's byteorder is different, do a byteswap if (out is not None and byteorders[arr.dtype.byteorder] != sys.byteorder): arr.byteswap(True) return arr def read(self, start=None, stop=None, step=None, out=None): """Get data in the array as an object of the current flavor. The start, stop and step parameters can be used to select only a *range of rows* in the array. Their meanings are the same as in the built-in range() Python function, except that negative values of step are not allowed yet. Moreover, if only start is specified, then stop will be set to start + 1. If you do not specify neither start nor stop, then *all the rows* in the array are selected. The out parameter may be used to specify a NumPy array to receive the output data. Note that the array must have the same size as the data selected with the other parameters. Note that the array's datatype is not checked and no type casting is performed, so if it does not match the datatype on disk, the output will not be correct. Also, this parameter is only valid when the array's flavor is set to 'numpy'. Otherwise, a TypeError will be raised. When data is read from disk in NumPy format, the output will be in the current system's byteorder, regardless of how it is stored on disk. The exception is when an output buffer is supplied, in which case the output will be in the byteorder of that output buffer. .. versionchanged:: 3.0 Added the *out* parameter. """ self._g_check_open() if out is not None and self.flavor != 'numpy': msg = ("Optional 'out' argument may only be supplied if array " "flavor is 'numpy', currently is {0}").format(self.flavor) raise TypeError(msg) (start, stop, step) = self._process_range_read(start, stop, step) arr = self._read(start, stop, step, out) return internal_to_flavor(arr, self.flavor) def _g_copy_with_stats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): """Private part of Leaf.copy() for each kind of leaf.""" # Compute the correct indices. (start, stop, step) = self._process_range_read(start, stop, step) # Get the slice of the array # (non-buffered version) if self.shape: arr = self[start:stop:step] else: arr = self[()] # Build the new Array object. Use the _atom reserved keyword # just in case the array is being copied from a native HDF5 # with atomic types different from scalars. # For details, see #275 of trac. object_ = Array(group, name, arr, title=title, _log=_log, _atom=self.atom) nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size return (object_, nbytes) _g_copyWithStats = previous_api(_g_copy_with_stats) def __repr__(self): """This provides more metainfo in addition to standard __str__""" return """%s atom := %r maindim := %r flavor := %r byteorder := %r chunkshape := %r""" % (self, self.atom, self.maindim, self.flavor, self.byteorder, self.chunkshape)
class Expr(object): """A class for evaluating expressions with arbitrary array-like objects. Expr is a class for evaluating expressions containing array-like objects. With it, you can evaluate expressions (like "3 * a + 4 * b") that operate on arbitrary large arrays while optimizing the resources required to perform them (basically main memory and CPU cache memory). It is similar to the Numexpr package (see :ref:`[NUMEXPR] <NUMEXPR>`), but in addition to NumPy objects, it also accepts disk-based homogeneous arrays, like the Array, CArray, EArray and Column PyTables objects. All the internal computations are performed via the Numexpr package, so all the broadcast and upcasting rules of Numexpr applies here too. These rules are very similar to the NumPy ones, but with some exceptions due to the particularities of having to deal with potentially very large disk-based arrays. Be sure to read the documentation of the Expr constructor and methods as well as that of Numexpr, if you want to fully grasp these particularities. Parameters ---------- expr : str This specifies the expression to be evaluated, such as "2 * a + 3 * b". uservars : dict This can be used to define the variable names appearing in *expr*. This mapping should consist of identifier-like strings pointing to any `Array`, `CArray`, `EArray`, `Column` or NumPy ndarray instances (or even others which will tried to be converted to ndarrays). When `uservars` is not provided or `None`, the current local and global namespace is sought instead of `uservars`. It is also possible to pass just some of the variables in expression via the `uservars` mapping, and the rest will be retrieved from the current local and global namespaces. kwargs : dict This is meant to pass additional parameters to the Numexpr kernel. This is basically the same as the kwargs argument in Numexpr.evaluate(), and is mainly meant for advanced use. Examples -------- The following shows an example of using Expr. >>> a = f.create_array('/', 'a', np.array([1,2,3])) >>> b = f.create_array('/', 'b', np.array([3,4,5])) >>> c = np.array([4,5,6]) >>> expr = tb.Expr("2 * a + b * c") # initialize the expression >>> expr.eval() # evaluate it array([14, 24, 36]) >>> sum(expr) # use as an iterator 74 where you can see that you can mix different containers in the expression (whenever shapes are consistent). You can also work with multidimensional arrays:: >>> a2 = f.create_array('/', 'a2', np.array([[1,2],[3,4]])) >>> b2 = f.create_array('/', 'b2', np.array([[3,4],[5,6]])) >>> c2 = np.array([4,5]) # This will be broadcasted >>> expr = tb.Expr("2 * a2 + b2-c2") >>> expr.eval() array([[1, 3], [7, 9]]) >>> sum(expr) array([ 8, 12]) .. rubric:: Expr attributes .. attribute:: append_mode The append mode for user-provided output containers. .. attribute:: maindim Common main dimension for inputs in expression. .. attribute:: names The names of variables in expression (list). .. attribute:: out The user-provided container (if any) for the expression outcome. .. attribute:: o_start The start range selection for the user-provided output. .. attribute:: o_stop The stop range selection for the user-provided output. .. attribute:: o_step The step range selection for the user-provided output. .. attribute:: shape Common shape for the arrays in expression. .. attribute:: values The values of variables in expression (list). """ _exprvars_cache = {} """Cache of variables participating in expressions. .. versionadded:: 3.0 """ def __init__(self, expr, uservars=None, **kwargs): self.append_mode = False """The append mode for user-provided output containers.""" self.maindim = 0 """Common main dimension for inputs in expression.""" self.names = [] """The names of variables in expression (list).""" self.out = None """The user-provided container (if any) for the expression outcome.""" self.o_start = None """The start range selection for the user-provided output.""" self.o_stop = None """The stop range selection for the user-provided output.""" self.o_step = None """The step range selection for the user-provided output.""" self.shape = None """Common shape for the arrays in expression.""" self.start, self.stop, self.step = (None, ) * 3 self.start = None """The start range selection for the input.""" self.stop = None """The stop range selection for the input.""" self.step = None """The step range selection for the input.""" self.values = [] """The values of variables in expression (list).""" self._compiled_expr = None """The compiled expression.""" self._single_row_out = None """A sample of the output with just a single row.""" # First, get the signature for the arrays in expression vars_ = self._required_expr_vars(expr, uservars) context = getContext(kwargs) self.names, _ = getExprNames(expr, context) # Raise a ValueError in case we have unsupported objects for name, var in vars_.iteritems(): if type(var) in (int, long, float, str): continue if not isinstance(var, (tb.Leaf, tb.Column)): if hasattr(var, "dtype"): # Quacks like a NumPy object continue raise TypeError("Unsupported variable type: %r" % var) objname = var.__class__.__name__ if objname not in ("Array", "CArray", "EArray", "Column"): raise TypeError("Unsupported variable type: %r" % var) # NumPy arrays to be copied? (we don't need to worry about # PyTables objects, as the reads always return contiguous and # aligned objects, or at least I think so). for name, var in vars_.iteritems(): if isinstance(var, np.ndarray): # See numexpr.necompiler.evaluate for a rational # of the code below if not var.flags.aligned: if var.ndim != 1: # Do a copy of this variable var = var.copy() # Update the vars_ dictionary vars_[name] = var # Get the variables and types values = self.values types_ = [] for name in self.names: value = vars_[name] if hasattr(value, 'atom'): types_.append(value.atom) elif hasattr(value, 'dtype'): types_.append(value) else: # try to convert into a NumPy array value = np.array(value) types_.append(value) values.append(value) # Create a signature for the expression signature = [(name, getType(type_)) for (name, type_) in zip(self.names, types_)] # Compile the expression self._compiled_expr = NumExpr(expr, signature, **kwargs) # Guess the shape for the outcome and the maindim of inputs self.shape, self.maindim = self._guess_shape() # The next method is similar to their counterpart in `Table`, but # adapted to the `Expr` own requirements. def _required_expr_vars(self, expression, uservars, depth=2): """Get the variables required by the `expression`. A new dictionary defining the variables used in the `expression` is returned. Required variables are first looked up in the `uservars` mapping, then in the set of top-level columns of the table. Unknown variables cause a `NameError` to be raised. When `uservars` is `None`, the local and global namespace where the API callable which uses this method is called is sought instead. To disable this mechanism, just specify a mapping as `uservars`. Nested columns and variables with an ``uint64`` type are not allowed (`TypeError` and `NotImplementedError` are raised, respectively). `depth` specifies the depth of the frame in order to reach local or global variables. """ # Get the names of variables used in the expression. exprvars_cache = self._exprvars_cache if not expression in exprvars_cache: # Protection against growing the cache too much if len(exprvars_cache) > 256: # Remove 10 (arbitrary) elements from the cache for k in exprvars_cache.keys()[:10]: del exprvars_cache[k] cexpr = compile(expression, '<string>', 'eval') exprvars = [ var for var in cexpr.co_names if var not in ['None', 'False', 'True'] and var not in numexpr_functions ] exprvars_cache[expression] = exprvars else: exprvars = exprvars_cache[expression] # Get the local and global variable mappings of the user frame # if no mapping has been explicitly given for user variables. user_locals, user_globals = {}, {} if uservars is None: user_frame = sys._getframe(depth) user_locals = user_frame.f_locals user_globals = user_frame.f_globals # Look for the required variables first among the ones # explicitly provided by the user. reqvars = {} for var in exprvars: # Get the value. if uservars is not None and var in uservars: val = uservars[var] elif uservars is None and var in user_locals: val = user_locals[var] elif uservars is None and var in user_globals: val = user_globals[var] else: raise NameError("name ``%s`` is not defined" % var) # Check the value. if hasattr(val, 'dtype') and val.dtype.str[1:] == 'u8': raise NotImplementedError( "variable ``%s`` refers to " "a 64-bit unsigned integer object, that is " "not yet supported in expressions, sorry; " % var) elif hasattr(val, '_v_colpathnames'): # nested column # This branch is never reached because the compile step # above already raise a ``TypeError`` for nested # columns, but that could change in the future. So it # is best to let this here. raise TypeError("variable ``%s`` refers to a nested column, " "not allowed in expressions" % var) reqvars[var] = val return reqvars _requiredExprVars = previous_api(_required_expr_vars) def set_inputs_range(self, start=None, stop=None, step=None): """Define a range for all inputs in expression. The computation will only take place for the range defined by the start, stop and step parameters in the main dimension of inputs (or the leading one, if the object lacks the concept of main dimension, like a NumPy container). If not a common main dimension exists for all inputs, the leading dimension will be used instead. """ self.start = start self.stop = stop self.step = step setInputsRange = previous_api(set_inputs_range) def set_output(self, out, append_mode=False): """Set out as container for output as well as the append_mode. The out must be a container that is meant to keep the outcome of the expression. It should be an homogeneous type container and can typically be an Array, CArray, EArray, Column or a NumPy ndarray. The append_mode specifies the way of which the output is filled. If true, the rows of the outcome are *appended* to the out container. Of course, for doing this it is necessary that out would have an append() method (like an EArray, for example). If append_mode is false, the output is set via the __setitem__() method (see the Expr.set_output_range() for info on how to select the rows to be updated). If out is smaller than what is required by the expression, only the computations that are needed to fill up the container are carried out. If it is larger, the excess elements are unaffected. """ if not (hasattr(out, "shape") and hasattr(out, "__setitem__")): raise ValueError( "You need to pass a settable multidimensional container " "as output") self.out = out if append_mode and not hasattr(out, "append"): raise ValueError( "For activating the ``append`` mode, you need a container " "with an `append()` method (like the `EArray`)") self.append_mode = append_mode setOutput = previous_api(set_output) def set_output_range(self, start=None, stop=None, step=None): """Define a range for user-provided output object. The output object will only be modified in the range specified by the start, stop and step parameters in the main dimension of output (or the leading one, if the object does not have the concept of main dimension, like a NumPy container). """ if self.out is None: raise IndexError( "You need to pass an output object to `setOut()` first") self.o_start = start self.o_stop = stop self.o_step = step setOutputRange = previous_api(set_output_range) # Although the next code is similar to the method in `Leaf`, it # allows the use of pure NumPy objects. def _calc_nrowsinbuf(self, object_): """Calculate the number of rows that will fit in a buffer.""" # Compute the rowsize for the *leading* dimension shape_ = list(object_.shape) if shape_: shape_[0] = 1 rowsize = np.prod(shape_) * object_.dtype.itemsize # Compute the nrowsinbuf # Multiplying the I/O buffer size by 4 gives optimal results # in my benchmarks with `tables.Expr` (see ``bench/poly.py``) buffersize = IO_BUFFER_SIZE * 4 nrowsinbuf = buffersize // rowsize # Safeguard against row sizes being extremely large if nrowsinbuf == 0: nrowsinbuf = 1 # If rowsize is too large, issue a Performance warning maxrowsize = BUFFER_TIMES * buffersize if rowsize > maxrowsize: warnings.warn( """\ The object ``%s`` is exceeding the maximum recommended rowsize (%d bytes); be ready to see PyTables asking for *lots* of memory and possibly slow I/O. You may want to reduce the rowsize by trimming the value of dimensions that are orthogonal (and preferably close) to the *leading* dimension of this object.""" % (object, maxrowsize), PerformanceWarning) return nrowsinbuf def _guess_shape(self): """Guess the shape of the output of the expression.""" # First, compute the maximum dimension of inputs and maindim # (if it exists) maxndim = 0 maindims = [] for val in self.values: # Get the minimum of the lengths if len(val.shape) > maxndim: maxndim = len(val.shape) if hasattr(val, "maindim"): maindims.append(val.maindim) if maxndim == 0: self._single_row_out = out = self._compiled_expr(*self.values) return (), None if maindims and [maindims[0]] * len(maindims) == maindims: # If all maindims detected are the same, use this as maindim maindim = maindims[0] else: # If not, the main dimension will be the default one maindim = 0 # The slices parameter for inputs slices = (slice(None), ) * maindim + (0, ) # Now, collect the values in first row of arrays with maximum dims vals = [] lens = [] for val in self.values: shape = val.shape # Warning: don't use len(val) below or it will raise an # `Overflow` error on 32-bit platforms for large enough arrays. if shape != () and shape[maindim] == 0: vals.append(val[:]) lens.append(0) elif len(shape) < maxndim: vals.append(val) else: vals.append(val.__getitem__(slices)) lens.append(shape[maindim]) minlen = min(lens) self._single_row_out = out = self._compiled_expr(*vals) shape = list(out.shape) if minlen > 0: shape.insert(maindim, minlen) return shape, maindim def _get_info(self, shape, maindim, itermode=False): """Return various info needed for evaluating the computation loop.""" # Compute the shape of the resulting container having # in account new possible values of start, stop and step in # the inputs range if maindim is not None: (start, stop, step) = get_indices(self.start, self.stop, self.step, shape[maindim]) shape[maindim] = min(shape[maindim], len(xrange(start, stop, step))) i_nrows = shape[maindim] else: start, stop, step = 0, 0, None i_nrows = 0 if not itermode: # Create a container for output if not defined yet o_maindim = 0 # Default maindim if self.out is None: out = np.empty(shape, dtype=self._single_row_out.dtype) # Get the trivial values for start, stop and step if maindim is not None: (o_start, o_stop, o_step) = (0, shape[maindim], 1) else: (o_start, o_stop, o_step) = (0, 0, 1) else: out = self.out # Out container already provided. Do some sanity checks. if hasattr(out, "maindim"): o_maindim = out.maindim # Refine the shape of the resulting container having in # account new possible values of start, stop and step in # the output range o_shape = list(out.shape) (o_start, o_stop, o_step) = get_indices(self.o_start, self.o_stop, self.o_step, o_shape[o_maindim]) o_shape[o_maindim] = min(o_shape[o_maindim], len(xrange(o_start, o_stop, o_step))) # Check that the shape of output is consistent with inputs tr_oshape = list(o_shape) # this implies a copy olen_ = tr_oshape.pop(o_maindim) tr_shape = list(shape) # do a copy if maindim is not None: len_ = tr_shape.pop(o_maindim) else: len_ = 1 if tr_oshape != tr_shape: raise ValueError( "Shape for out container does not match expression") # Force the input length to fit in `out` if not self.append_mode and olen_ < len_: shape[o_maindim] = olen_ stop = start + olen_ # Get the positions of inputs that should be sliced (the others # will be broadcasted) ndim = len(shape) slice_pos = [ i for i, val in enumerate(self.values) if len(val.shape) == ndim ] # The size of the I/O buffer nrowsinbuf = 1 for i, val in enumerate(self.values): # Skip scalar values in variables if i in slice_pos: nrows = self._calc_nrowsinbuf(val) if nrows > nrowsinbuf: nrowsinbuf = nrows if not itermode: return (i_nrows, slice_pos, start, stop, step, nrowsinbuf, out, o_maindim, o_start, o_stop, o_step) else: # For itermode, we don't need the out info return (i_nrows, slice_pos, start, stop, step, nrowsinbuf) def eval(self): """Evaluate the expression and return the outcome. Because of performance reasons, the computation order tries to go along the common main dimension of all inputs. If not such a common main dimension is found, the iteration will go along the leading dimension instead. For non-consistent shapes in inputs (i.e. shapes having a different number of dimensions), the regular NumPy broadcast rules applies. There is one exception to this rule though: when the dimensions orthogonal to the main dimension of the expression are consistent, but the main dimension itself differs among the inputs, then the shortest one is chosen for doing the computations. This is so because trying to expand very large on-disk arrays could be too expensive or simply not possible. Also, the regular Numexpr casting rules (which are similar to those of NumPy, although you should check the Numexpr manual for the exceptions) are applied to determine the output type. Finally, if the setOuput() method specifying a user container has already been called, the output is sent to this user-provided container. If not, a fresh NumPy container is returned instead. .. warning:: When dealing with large on-disk inputs, failing to specify an on-disk container may consume all your available memory. """ values, shape, maindim = self.values, self.shape, self.maindim # Get different info we need for the main computation loop (i_nrows, slice_pos, start, stop, step, nrowsinbuf, out, o_maindim, o_start, o_stop, o_step) = \ self._get_info(shape, maindim) if i_nrows == 0: # No elements to compute return self._single_row_out # Create a key that selects every element in inputs and output # (including the main dimension) i_slices = [slice(None)] * (maindim + 1) o_slices = [slice(None)] * (o_maindim + 1) # This is a hack to prevent doing unnecessary flavor conversions # while reading buffers for val in values: if hasattr(val, 'maindim'): val._v_convert = False # Start the computation itself for start2 in xrange(start, stop, step * nrowsinbuf): stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice for inputs i_slices[maindim] = slice(start2, stop2, step) # Get the input values vals = [] for i, val in enumerate(values): if i in slice_pos: vals.append(val.__getitem__(tuple(i_slices))) else: # A read of values is not apparently needed, as PyTables # leaves seems to work just fine inside Numexpr vals.append(val) # Do the actual computation for this slice rout = self._compiled_expr(*vals) # Set the values into the out buffer if self.append_mode: out.append(rout) else: # Compute the slice to be filled in output start3 = o_start + (start2 - start) // step stop3 = start3 + nrowsinbuf * o_step if stop3 > o_stop: stop3 = o_stop o_slices[o_maindim] = slice(start3, stop3, o_step) # Set the slice out[tuple(o_slices)] = rout # Activate the conversion again (default) for val in values: if hasattr(val, 'maindim'): val._v_convert = True return out def __iter__(self): """Iterate over the rows of the outcome of the expression. This iterator always returns rows as NumPy objects, so a possible out container specified in :meth:`Expr.set_output` method is ignored here. """ values, shape, maindim = self.values, self.shape, self.maindim # Get different info we need for the main computation loop (i_nrows, slice_pos, start, stop, step, nrowsinbuf) = \ self._get_info(shape, maindim, itermode=True) if i_nrows == 0: # No elements to compute return # Create a key that selects every element in inputs # (including the main dimension) i_slices = [slice(None)] * (maindim + 1) # This is a hack to prevent doing unnecessary flavor conversions # while reading buffers for val in values: if hasattr(val, 'maindim'): val._v_convert = False # Start the computation itself for start2 in xrange(start, stop, step * nrowsinbuf): stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the main dimension i_slices[maindim] = slice(start2, stop2, step) # Get the values for computing the buffer vals = [] for i, val in enumerate(values): if i in slice_pos: vals.append(val.__getitem__(tuple(i_slices))) else: # A read of values is not apparently needed, as PyTables # leaves seems to work just fine inside Numexpr vals.append(val) # Do the actual computation rout = self._compiled_expr(*vals) # Return one row per call for row in rout: yield row # Activate the conversion again (default) for val in values: if hasattr(val, 'maindim'): val._v_convert = True
class EArray(CArray): """This class represents extendable, homogeneous datasets in an HDF5 file. The main difference between an EArray and a CArray (see :ref:`CArrayClassDescr`), from which it inherits, is that the former can be enlarged along one of its dimensions, the *enlargeable dimension*. That means that the :attr:`Leaf.extdim` attribute (see :class:`Leaf`) of any EArray instance will always be non-negative. Multiple enlargeable dimensions might be supported in the future. New rows can be added to the end of an enlargeable array by using the :meth:`EArray.append` method. Parameters ---------- parentnode The parent :class:`Group` object. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode*. name : str The name of this node in its parent group. atom An `Atom` instance representing the *type* and *shape* of the atomic objects to be saved. shape The shape of the new array. One (and only one) of the shape dimensions *must* be 0. The dimension being 0 means that the resulting `EArray` object can be extended along it. Multiple enlargeable dimensions are not supported right now. title A description for this node (it sets the ``TITLE`` HDF5 attribute on disk). filters An instance of the `Filters` class that provides information about the desired I/O filters to be applied during the life of this object. expectedrows A user estimate about the number of row elements that will be added to the growable dimension in the `EArray` node. If not provided, the default value is ``EXPECTED_ROWS_EARRAY`` (see ``tables/parameters.py``). If you plan to create either a much smaller or a much bigger `EArray` try providing a guess; this will optimize the HDF5 B-Tree creation and management process time and the amount of memory used. chunkshape The shape of the data chunk to be read or written in a single HDF5 I/O operation. Filters are applied to those chunks of data. The dimensionality of `chunkshape` must be the same as that of `shape` (beware: no dimension should be 0 this time!). If ``None``, a sensible value is calculated based on the `expectedrows` parameter (which is recommended). byteorder The byteorder of the data *on disk*, specified as 'little' or 'big'. If this is not specified, the byteorder is that of the platform. Examples -------- See below a small example of the use of the `EArray` class. The code is available in ``examples/earray1.py``:: import tables import numpy fileh = tables.open_file('earray1.h5', mode='w') a = tables.StringAtom(itemsize=8) # Use ``a`` as the object type for the enlargeable array. array_c = fileh.create_earray(fileh.root, 'array_c', a, (0,), \"Chars\") array_c.append(numpy.array(['a'*2, 'b'*4], dtype='S8')) array_c.append(numpy.array(['a'*6, 'b'*8, 'c'*10], dtype='S8')) # Read the string ``EArray`` we have created on disk. for s in array_c: print('array_c[%s] => %r' % (array_c.nrow, s)) # Close the file. fileh.close() The output for the previous script is something like:: array_c[0] => 'aa' array_c[1] => 'bbbb' array_c[2] => 'aaaaaa' array_c[3] => 'bbbbbbbb' array_c[4] => 'cccccccc' """ # Class identifier. _c_classid = 'EARRAY' _c_classId = previous_api_property('_c_classid') # Special methods # ~~~~~~~~~~~~~~~ def __init__(self, parentnode, name, atom=None, shape=None, title="", filters=None, expectedrows=None, chunkshape=None, byteorder=None, _log=True): # Specific of EArray if expectedrows is None: expectedrows = parentnode._v_file.params['EXPECTED_ROWS_EARRAY'] self._v_expectedrows = expectedrows """The expected number of rows to be stored in the array.""" # Call the parent (CArray) init code super(EArray, self).__init__(parentnode, name, atom, shape, title, filters, chunkshape, byteorder, _log) # Public and private methods # ~~~~~~~~~~~~~~~~~~~~~~~~~~ def _g_create(self): """Create a new array in file (specific part).""" # Pre-conditions and extdim computation zerodims = numpy.sum(numpy.array(self.shape) == 0) if zerodims > 0: if zerodims == 1: self.extdim = list(self.shape).index(0) else: raise NotImplementedError( "Multiple enlargeable (0-)dimensions are not " "supported.") else: raise ValueError( "When creating EArrays, you need to set one of " "the dimensions of the Atom instance to zero.") # Finish the common part of the creation process return self._g_create_common(self._v_expectedrows) def _check_shape_append(self, nparr): "Test that nparr shape is consistent with underlying EArray." # The arrays conforms self expandibility? myrank = len(self.shape) narank = len(nparr.shape) - len(self.atom.shape) if myrank != narank: raise ValueError(("the ranks of the appended object (%d) and the " "``%s`` EArray (%d) differ") % (narank, self._v_pathname, myrank)) for i in range(myrank): if i != self.extdim and self.shape[i] != nparr.shape[i]: raise ValueError(("the shapes of the appended object and the " "``%s`` EArray differ in non-enlargeable " "dimension %d") % (self._v_pathname, i)) _checkShapeAppend = previous_api(_check_shape_append) def append(self, sequence): """Add a sequence of data to the end of the dataset. The sequence must have the same type as the array; otherwise a TypeError is raised. In the same way, the dimensions of the sequence must conform to the shape of the array, that is, all dimensions must match, with the exception of the enlargeable dimension, which can be of any length (even 0!). If the shape of the sequence is invalid, a ValueError is raised. """ self._g_check_open() self._v_file._check_writable() # Convert the sequence into a NumPy object nparr = convert_to_np_atom2(sequence, self.atom) # Check if it has a consistent shape with underlying EArray self._check_shape_append(nparr) # If the size of the nparr is zero, don't do anything else if nparr.size > 0: self._append(nparr) def _g_copy_with_stats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): """Private part of Leaf.copy() for each kind of leaf.""" (start, stop, step) = self._process_range_read(start, stop, step) # Build the new EArray object maindim = self.maindim shape = list(self.shape) shape[maindim] = 0 # The number of final rows nrows = len(xrange(0, stop - start, step)) # Build the new EArray object object = EArray( group, name, atom=self.atom, shape=shape, title=title, filters=filters, expectedrows=nrows, chunkshape=chunkshape, _log=_log) # Now, fill the new earray with values from source nrowsinbuf = self.nrowsinbuf # The slices parameter for self.__getitem__ slices = [slice(0, dim, 1) for dim in self.shape] # This is a hack to prevent doing unnecessary conversions # when copying buffers self._v_convert = False # Start the copy itself for start2 in xrange(start, stop, step * nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the extensible dimension slices[maindim] = slice(start2, stop2, step) object._append(self.__getitem__(tuple(slices))) # Active the conversion again (default) self._v_convert = True nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.itemsize return (object, nbytes) _g_copyWithStats = previous_api(_g_copy_with_stats)
if keyword.iskeyword(name): warnings.warn("object name is a Python keyword: %r; %s" % (name, warnInfo), NaturalNameWarning) return # Still, names starting with reserved prefixes are not allowed. if _reserved_id_re.match(name): raise ValueError("object name starts with a reserved prefix: %r; " "it matches the pattern ``%s``" % (name, _reserved_id_re.pattern)) # ``__members__`` is the only exception to that rule. if name == '__members__': raise ValueError("``__members__`` is not allowed as an object name") checkNameValidity = previous_api(check_name_validity) def join_path(parentpath, name): """Join a *canonical* `parentpath` with a *non-empty* `name`. .. versionchanged:: 3.0 The *parentPath* parameter has been renamed into *parentpath*. >>> join_path('/', 'foo') '/foo' >>> join_path('/foo', 'bar') '/foo/bar' >>> join_path('/foo', '/foo2/bar') '/foo/foo2/bar' >>> join_path('/foo', '/')
nrowscopied = SizeType(0) nbytes = 0 if not hasattr(self.atom, 'size'): # it is a pseudo-atom atomsize = self.atom.base.size else: atomsize = self.atom.size for start2 in xrange(start, stop, step * nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop nparr = self._read_array(start=start2, stop=stop2, step=step)[0] nobjects = nparr.shape[0] object._append(nparr, nobjects) nbytes += nobjects * atomsize nrowscopied += 1 object.nrows = nrowscopied return (object, nbytes) _g_copyWithStats = previous_api(_g_copy_with_stats) def __repr__(self): """This provides more metainfo in addition to standard __str__""" return """%s atom = %r byteorder = %r nrows = %s flavor = %r""" % (self, self.atom, self.byteorder, self.nrows, self.flavor)
class CArray(Array): """This class represents homogeneous datasets in an HDF5 file. The difference between a CArray and a normal Array (see :ref:`ArrayClassDescr`), from which it inherits, is that a CArray has a chunked layout and, as a consequence, it supports compression. You can use datasets of this class to easily save or load arrays to or from disk, with compression support included. CArray includes all the instance variables and methods of Array. Only those with different behavior are mentioned here. Parameters ---------- parentnode The parent :class:`Group` object. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode*. name : str The name of this node in its parent group. atom An `Atom` instance representing the *type* and *shape* of the atomic objects to be saved. shape The shape of the new array. title A description for this node (it sets the ``TITLE`` HDF5 attribute on disk). filters An instance of the `Filters` class that provides information about the desired I/O filters to be applied during the life of this object. chunkshape The shape of the data chunk to be read or written in a single HDF5 I/O operation. Filters are applied to those chunks of data. The dimensionality of `chunkshape` must be the same as that of `shape`. If ``None``, a sensible value is calculated (which is recommended). byteorder The byteorder of the data *on disk*, specified as 'little' or 'big'. If this is not specified, the byteorder is that of the platform. Examples -------- See below a small example of the use of the `CArray` class. The code is available in ``examples/carray1.py``:: import numpy import tables fileName = 'carray1.h5' shape = (200, 300) atom = tables.UInt8Atom() filters = tables.Filters(complevel=5, complib='zlib') h5f = tables.open_file(fileName, 'w') ca = h5f.create_carray(h5f.root, 'carray', atom, shape, filters=filters) # Fill a hyperslab in ``ca``. ca[10:60, 20:70] = numpy.ones((50, 50)) h5f.close() # Re-open a read another hyperslab h5f = tables.open_file(fileName) print(h5f) print(h5f.root.carray[8:12, 18:22]) h5f.close() The output for the previous script is something like:: carray1.h5 (File) '' Last modif.: 'Thu Apr 12 10:15:38 2007' Object Tree: / (RootGroup) '' /carray (CArray(200, 300), shuffle, zlib(5)) '' [[0 0 0 0] [0 0 0 0] [0 0 1 1] [0 0 1 1]] """ # Class identifier. _c_classid = 'CARRAY' _c_classId = previous_api_property('_c_classid') # Properties # ~~~~~~~~~~ # Special methods # ~~~~~~~~~~~~~~~ def __init__(self, parentnode, name, atom=None, shape=None, title="", filters=None, chunkshape=None, byteorder=None, _log=True): self.atom = atom """An `Atom` instance representing the shape, type of the atomic objects to be saved. """ self.shape = None """The shape of the stored array.""" self.extdim = -1 # `CArray` objects are not enlargeable by default """The index of the enlargeable dimension.""" # Other private attributes self._v_version = None """The object version of this array.""" self._v_new = new = atom is not None """Is this the first time the node has been created?""" self._v_new_title = title """New title for this node.""" self._v_convert = True """Whether the ``Array`` object must be converted or not.""" self._v_chunkshape = chunkshape """Private storage for the `chunkshape` property of the leaf.""" # Miscellaneous iteration rubbish. self._start = None """Starting row for the current iteration.""" self._stop = None """Stopping row for the current iteration.""" self._step = None """Step size for the current iteration.""" self._nrowsread = None """Number of rows read up to the current state of iteration.""" self._startb = None """Starting row for current buffer.""" self._stopb = None """Stopping row for current buffer. """ self._row = None """Current row in iterators (sentinel).""" self._init = False """Whether we are in the middle of an iteration or not (sentinel).""" self.listarr = None """Current buffer in iterators.""" if new: if not isinstance(atom, Atom): raise ValueError("atom parameter should be an instance of " "tables.Atom and you passed a %s." % type(atom)) if shape is None: raise ValueError("you must specify a non-empty shape") try: shape = tuple(shape) except TypeError: raise TypeError("`shape` parameter must be a sequence " "and you passed a %s" % type(shape)) self.shape = tuple(SizeType(s) for s in shape) if chunkshape is not None: try: chunkshape = tuple(chunkshape) except TypeError: raise TypeError( "`chunkshape` parameter must be a sequence " "and you passed a %s" % type(chunkshape)) if len(shape) != len(chunkshape): raise ValueError("the shape (%s) and chunkshape (%s) " "ranks must be equal." % (shape, chunkshape)) elif min(chunkshape) < 1: raise ValueError("chunkshape parameter cannot have " "zero-dimensions.") self._v_chunkshape = tuple(SizeType(s) for s in chunkshape) # The `Array` class is not abstract enough! :( super(Array, self).__init__(parentnode, name, new, filters, byteorder, _log) def _g_create(self): """Create a new array in file (specific part).""" if min(self.shape) < 1: raise ValueError("shape parameter cannot have zero-dimensions.") # Finish the common part of creation process return self._g_create_common(self.nrows) def _g_create_common(self, expectedrows): """Create a new array in file (common part).""" self._v_version = obversion if self._v_chunkshape is None: # Compute the optimal chunk size self._v_chunkshape = self._calc_chunkshape(expectedrows, self.rowsize, self.atom.size) # Compute the optimal nrowsinbuf self.nrowsinbuf = self._calc_nrowsinbuf() # Correct the byteorder if needed if self.byteorder is None: self.byteorder = correct_byteorder(self.atom.type, sys.byteorder) try: # ``self._v_objectid`` needs to be set because would be # needed for setting attributes in some descendants later # on self._v_objectid = self._create_carray(self._v_new_title) except: # XXX # Problems creating the Array on disk. Close node and re-raise. self.close(flush=0) raise return self._v_objectid def _g_copy_with_stats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): """Private part of Leaf.copy() for each kind of leaf.""" (start, stop, step) = self._process_range_read(start, stop, step) maindim = self.maindim shape = list(self.shape) shape[maindim] = len(xrange(0, stop - start, step)) # Now, fill the new carray with values from source nrowsinbuf = self.nrowsinbuf # The slices parameter for self.__getitem__ slices = [slice(0, dim, 1) for dim in self.shape] # This is a hack to prevent doing unnecessary conversions # when copying buffers self._v_convert = False # Build the new CArray object object = CArray(group, name, atom=self.atom, shape=shape, title=title, filters=filters, chunkshape=chunkshape, _log=_log) # Start the copy itself for start2 in xrange(start, stop, step * nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the main dimension slices[maindim] = slice(start2, stop2, step) start3 = (start2 - start) // step stop3 = start3 + nrowsinbuf if stop3 > shape[maindim]: stop3 = shape[maindim] # The next line should be generalised if, in the future, # maindim is designed to be different from 0 in CArrays. # See ticket #199. object[start3:stop3] = self.__getitem__(tuple(slices)) # Activate the conversion again (default) self._v_convert = True nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size return (object, nbytes) _g_copyWithStats = previous_api(_g_copy_with_stats)
import tables as t from tables import linkextension from tables.node import Node from tables.utils import lazyattr from tables.attributeset import AttributeSet import tables.file from tables._past import previous_api, previous_api_property def _g_get_link_class(parent_id, name): """Guess the link class.""" return linkextension._get_link_class(parent_id, name) _g_getLinkClass = previous_api(_g_get_link_class) class Link(Node): """Abstract base class for all PyTables links. A link is a node that refers to another node. The Link class inherits from Node class and the links that inherits from Link are SoftLink and ExternalLink. There is not a HardLink subclass because hard links behave like a regular Group or Leaf. Contrarily to other nodes, links cannot have HDF5 attributes. This is an HDF5 library limitation that might be solved in future releases. See :ref:`LinksTutorial` for a small tutorial on how to work with links. .. rubric:: Link attributes
import os import tables from tables import linkextension from tables.node import Node from tables.utils import lazyattr from tables.attributeset import AttributeSet import tables.file from tables._past import previous_api, previous_api_property def _g_get_link_class(parent_id, name): """Guess the link class.""" return linkextension._get_link_class(parent_id, name) _g_getLinkClass = previous_api(_g_get_link_class) class Link(Node): """Abstract base class for all PyTables links. A link is a node that refers to another node. The Link class inherits from Node class and the links that inherits from Link are SoftLink and ExternalLink. There is not a HardLink subclass because hard links behave like a regular Group or Leaf. Contrarily to other nodes, links cannot have HDF5 attributes. This is an HDF5 library limitation that might be solved in future releases. See :ref:`LinksTutorial` for a small tutorial on how to work with links. .. rubric:: Link attributes
class Leaf(Node): """Abstract base class for all PyTables leaves. A leaf is a node (see the Node class in :class:`Node`) which hangs from a group (see the Group class in :class:`Group`) but, unlike a group, it can not have any further children below it (i.e. it is an end node). This definition includes all nodes which contain actual data (datasets handled by the Table - see :ref:`TableClassDescr`, Array - see :ref:`ArrayClassDescr`, CArray - see :ref:`CArrayClassDescr`, EArray - see :ref:`EArrayClassDescr`, and VLArray - see :ref:`VLArrayClassDescr` classes) and unsupported nodes (the UnImplemented class - :ref:`UnImplementedClassDescr`) these classes do in fact inherit from Leaf. .. rubric:: Leaf attributes These instance variables are provided in addition to those in Node (see :ref:`NodeClassDescr`): .. attribute:: byteorder The byte ordering of the leaf data *on disk*. It will be either ``little`` or ``big``. .. attribute:: dtype The NumPy dtype that most closely matches this leaf type. .. attribute:: extdim The index of the enlargeable dimension (-1 if none). .. attribute:: nrows The length of the main dimension of the leaf data. .. attribute:: nrowsinbuf The number of rows that fit in internal input buffers. You can change this to fine-tune the speed or memory requirements of your application. .. attribute:: shape The shape of data in the leaf. """ # Properties # ~~~~~~~~~~ # Node property aliases # ````````````````````` # These are a little hard to override, but so are properties. attrs = Node._v_attrs """The associated AttributeSet instance - see :ref:`AttributeSetClassDescr` (This is an easier-to-write alias of :attr:`Node._v_attrs`.""" title = Node._v_title """A description for this node (This is an easier-to-write alias of :attr:`Node._v_title`).""" # Read-only node property aliases # ``````````````````````````````` name = property( lambda self: self._v_name, None, None, """The name of this node in its parent group (This is an easier-to-write alias of :attr:`Node._v_name`).""") chunkshape = property( lambda self: getattr(self, '_v_chunkshape', None), None, None, """The HDF5 chunk size for chunked leaves (a tuple). This is read-only because you cannot change the chunk size of a leaf once it has been created. """) object_id = property( lambda self: self._v_objectid, None, None, """A node identifier, which may change from run to run. (This is an easier-to-write alias of :attr:`Node._v_objectid`). .. versionchanged:: 3.0 The *objectID* property has been renamed into *object_id*. """) objectID = previous_api(object_id) ndim = property( lambda self: len(self.shape), None, None, """The number of dimensions of the leaf data. .. versionadded: 2.4""") # Lazy read-only attributes # ````````````````````````` @lazyattr def filters(self): """Filter properties for this leaf. See Also -------- Filters """ return Filters._from_leaf(self) # Other properties # ```````````````` def _getmaindim(self): if self.extdim < 0: return 0 # choose the first dimension return self.extdim maindim = property( _getmaindim, None, None, """The dimension along which iterators work. Its value is 0 (i.e. the first dimension) when the dataset is not extendable, and self.extdim (where available) for extendable ones. """) def _setflavor(self, flavor): self._v_file._check_writable() check_flavor(flavor) self._v_attrs.FLAVOR = self._flavor = flavor # logs the change def _delflavor(self): del self._v_attrs.FLAVOR self._flavor = internal_flavor flavor = property( lambda self: self._flavor, _setflavor, _delflavor, """The type of data object read from this leaf. It can be any of 'numpy' or 'python'. You can (and are encouraged to) use this property to get, set and delete the FLAVOR HDF5 attribute of the leaf. When the leaf has no such attribute, the default flavor is used.. """) size_on_disk = property( lambda self: self._get_storage_size(), None, None, """ The size of this leaf's data in bytes as it is stored on disk. If the data is compressed, this shows the compressed size. In the case of uncompressed, chunked data, this may be slightly larger than the amount of data, due to partially filled chunks. """) # Special methods # ~~~~~~~~~~~~~~~ def __init__(self, parentnode, name, new=False, filters=None, byteorder=None, _log=True): self._v_new = new """Is this the first time the node has been created?""" self.nrowsinbuf = None """ The number of rows that fits in internal input buffers. You can change this to fine-tune the speed or memory requirements of your application. """ self._flavor = None """Private storage for the `flavor` property.""" if new: # Get filter properties from parent group if not given. if filters is None: filters = parentnode._v_filters self.__dict__['filters'] = filters # bypass the property if byteorder not in (None, 'little', 'big'): raise ValueError( "the byteorder can only take 'little' or 'big' values " "and you passed: %s" % byteorder) self.byteorder = byteorder """The byte ordering of the leaf data *on disk*.""" # Existing filters need not be read since `filters` # is a lazy property that automatically handles their loading. super(Leaf, self).__init__(parentnode, name, _log) def __len__(self): """Return the length of the main dimension of the leaf data. Please note that this may raise an OverflowError on 32-bit platforms for datasets having more than 2**31-1 rows. This is a limitation of Python that you can work around by using the nrows or shape attributes. """ return self.nrows def __str__(self): """The string representation for this object is its pathname in the HDF5 object tree plus some additional metainfo.""" # Get this class name classname = self.__class__.__name__ # The title title = self._v_title # The filters filters = "" if self.filters.fletcher32: filters += ", fletcher32" if self.filters.complevel: if self.filters.shuffle: filters += ", shuffle" filters += ", %s(%s)" % (self.filters.complib, self.filters.complevel) return "%s (%s%s%s) %r" % \ (self._v_pathname, classname, self.shape, filters, title) # Private methods # ~~~~~~~~~~~~~~~ def _g_post_init_hook(self): """Code to be run after node creation and before creation logging. This method gets or sets the flavor of the leaf. """ super(Leaf, self)._g_post_init_hook() if self._v_new: # set flavor of new node if self._flavor is None: self._flavor = internal_flavor else: # flavor set at creation time, do not log if self._v_file.params['PYTABLES_SYS_ATTRS']: self._v_attrs._g__setattr('FLAVOR', self._flavor) else: # get flavor of existing node (if any) if self._v_file.params['PYTABLES_SYS_ATTRS']: flavor = getattr(self._v_attrs, 'FLAVOR', internal_flavor) self._flavor = flavor_alias_map.get(flavor, flavor) else: self._flavor = internal_flavor _g_postInitHook = previous_api(_g_post_init_hook) def _calc_chunkshape(self, expectedrows, rowsize, itemsize): """Calculate the shape for the HDF5 chunk.""" # In case of a scalar shape, return the unit chunksize if self.shape == (): return (SizeType(1), ) # Compute the chunksize MB = 1024 * 1024 expected_mb = (expectedrows * rowsize) // MB chunksize = calc_chunksize(expected_mb) maindim = self.maindim # Compute the chunknitems chunknitems = chunksize // itemsize # Safeguard against itemsizes being extremely large if chunknitems == 0: chunknitems = 1 chunkshape = list(self.shape) # Check whether trimming the main dimension is enough chunkshape[maindim] = 1 newchunknitems = numpy.prod(chunkshape, dtype=SizeType) if newchunknitems <= chunknitems: chunkshape[maindim] = chunknitems // newchunknitems else: # No, so start trimming other dimensions as well for j in xrange(len(chunkshape)): # Check whether trimming this dimension is enough chunkshape[j] = 1 newchunknitems = numpy.prod(chunkshape, dtype=SizeType) if newchunknitems <= chunknitems: chunkshape[j] = chunknitems // newchunknitems break else: # Ops, we ran out of the loop without a break # Set the last dimension to chunknitems chunkshape[-1] = chunknitems return tuple(SizeType(s) for s in chunkshape) def _calc_nrowsinbuf(self): """Calculate the number of rows that fits on a PyTables buffer.""" params = self._v_file.params # Compute the nrowsinbuf rowsize = self.rowsize buffersize = params['IO_BUFFER_SIZE'] if rowsize != 0: nrowsinbuf = buffersize // rowsize else: nrowsinbuf = 1 # Safeguard against row sizes being extremely large if nrowsinbuf == 0: nrowsinbuf = 1 # If rowsize is too large, issue a Performance warning maxrowsize = params['BUFFER_TIMES'] * buffersize if rowsize > maxrowsize: warnings.warn( """\ The Leaf ``%s`` is exceeding the maximum recommended rowsize (%d bytes); be ready to see PyTables asking for *lots* of memory and possibly slow I/O. You may want to reduce the rowsize by trimming the value of dimensions that are orthogonal (and preferably close) to the *main* dimension of this leave. Alternatively, in case you have specified a very small/large chunksize, you may want to increase/decrease it.""" % (self._v_pathname, maxrowsize), PerformanceWarning) return nrowsinbuf # This method is appropriate for calls to __getitem__ methods def _process_range(self, start, stop, step, dim=None, warn_negstep=True): if dim is None: nrows = self.nrows # self.shape[self.maindim] else: nrows = self.shape[dim] if warn_negstep and step and step < 0: raise ValueError("slice step cannot be negative") #if start is not None: start = long(start) #if stop is not None: stop = long(stop) #if step is not None: step = long(step) return slice(start, stop, step).indices(long(nrows)) _processRange = previous_api(_process_range) # This method is appropriate for calls to read() methods def _process_range_read(self, start, stop, step, warn_negstep=True): nrows = self.nrows if start is not None and stop is None and step is None: # Protection against start greater than available records # nrows == 0 is a special case for empty objects if nrows > 0 and start >= nrows: raise IndexError("start of range (%s) is greater than " "number of rows (%s)" % (start, nrows)) step = 1 if start == -1: # corner case stop = nrows else: stop = start + 1 # Finally, get the correct values (over the main dimension) start, stop, step = self._process_range(start, stop, step, warn_negstep=warn_negstep) return (start, stop, step) _processRangeRead = previous_api(_process_range_read) def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs): # Compute default arguments. start = kwargs.pop('start', None) stop = kwargs.pop('stop', None) step = kwargs.pop('step', None) title = kwargs.pop('title', self._v_title) filters = kwargs.pop('filters', self.filters) chunkshape = kwargs.pop('chunkshape', self.chunkshape) copyuserattrs = kwargs.pop('copyuserattrs', True) stats = kwargs.pop('stats', None) if chunkshape == 'keep': chunkshape = self.chunkshape # Keep the original chunkshape elif chunkshape == 'auto': chunkshape = None # Will recompute chunkshape # Fix arguments with explicit None values for backwards compatibility. if title is None: title = self._v_title if filters is None: filters = self.filters # Create a copy of the object. (new_node, bytes) = self._g_copy_with_stats(newparent, newname, start, stop, step, title, filters, chunkshape, _log, **kwargs) # Copy user attributes if requested (or the flavor at least). if copyuserattrs: self._v_attrs._g_copy(new_node._v_attrs, copyclass=True) elif 'FLAVOR' in self._v_attrs: if self._v_file.params['PYTABLES_SYS_ATTRS']: new_node._v_attrs._g__setattr('FLAVOR', self._flavor) new_node._flavor = self._flavor # update cached value # Update statistics if needed. if stats is not None: stats['leaves'] += 1 stats['bytes'] += bytes return new_node def _g_fix_byteorder_data(self, data, dbyteorder): "Fix the byteorder of data passed in constructors." dbyteorder = byteorders[dbyteorder] # If self.byteorder has not been passed as an argument of # the constructor, then set it to the same value of data. if self.byteorder is None: self.byteorder = dbyteorder # Do an additional in-place byteswap of data if the in-memory # byteorder doesn't match that of the on-disk. This is the only # place that we have to do the conversion manually. In all the # other cases, it will be HDF5 the responsible of doing the # byteswap properly. if dbyteorder in ['little', 'big']: if dbyteorder != self.byteorder: # if data is not writeable, do a copy first if not data.flags.writeable: data = data.copy() data.byteswap(True) else: # Fix the byteorder again, no matter which byteorder have # specified the user in the constructor. self.byteorder = "irrelevant" return data def _point_selection(self, key): """Perform a point-wise selection. `key` can be any of the following items: * A boolean array with the same shape than self. Those positions with True values will signal the coordinates to be returned. * A numpy array (or list or tuple) with the point coordinates. This has to be a two-dimensional array of size len(self.shape) by num_elements containing a list of of zero-based values specifying the coordinates in the dataset of the selected elements. The order of the element coordinates in the array specifies the order in which the array elements are iterated through when I/O is performed. Duplicate coordinate locations are not checked for. Return the coordinates array. If this is not possible, raise a `TypeError` so that the next selection method can be tried out. This is useful for whatever `Leaf` instance implementing a point-wise selection. """ if type(key) in (list, tuple): if isinstance(key, tuple) and len(key) > len(self.shape): raise IndexError("Invalid index or slice: %r" % (key, )) # Try to convert key to a numpy array. If not possible, # a TypeError will be issued (to be catched later on). try: key = numpy.array(key) except ValueError: raise TypeError("Invalid index or slice: %r" % (key, )) elif not isinstance(key, numpy.ndarray): raise TypeError("Invalid index or slice: %r" % (key, )) # Protection against empty keys if len(key) == 0: return numpy.array([], dtype="i8") if key.dtype.kind == 'b': if not key.shape == self.shape: raise IndexError( "Boolean indexing array has incompatible shape") # Get the True coordinates (64-bit indices!) coords = numpy.asarray(key.nonzero(), dtype='i8') coords = numpy.transpose(coords) elif key.dtype.kind == 'i' or key.dtype.kind == 'u': if len(key.shape) > 2: raise IndexError( "Coordinate indexing array has incompatible shape") elif len(key.shape) == 2: if key.shape[0] != len(self.shape): raise IndexError( "Coordinate indexing array has incompatible shape") coords = numpy.asarray(key, dtype="i8") coords = numpy.transpose(coords) else: # For 1-dimensional datasets coords = numpy.asarray(key, dtype="i8") # handle negative indices idx = coords < 0 coords[idx] = (coords + self.shape)[idx] # bounds check if numpy.any(coords < 0) or numpy.any(coords >= self.shape): raise IndexError("Index out of bounds") else: raise TypeError("Only integer coordinates allowed.") # We absolutely need a contiguous array if not coords.flags.contiguous: coords = coords.copy() return coords _pointSelection = previous_api(_point_selection) # Public methods # ~~~~~~~~~~~~~~ # Tree manipulation # ````````````````` def remove(self): """Remove this node from the hierarchy. This method has the behavior described in :meth:`Node._f_remove`. Please note that there is no recursive flag since leaves do not have child nodes. """ self._f_remove(False) def rename(self, newname): """Rename this node in place. This method has the behavior described in :meth:`Node._f_rename()`. """ self._f_rename(newname) def move(self, newparent=None, newname=None, overwrite=False, createparents=False): """Move or rename this node. This method has the behavior described in :meth:`Node._f_move` """ self._f_move(newparent, newname, overwrite, createparents) def copy(self, newparent=None, newname=None, overwrite=False, createparents=False, **kwargs): """Copy this node and return the new one. This method has the behavior described in :meth:`Node._f_copy`. Please note that there is no recursive flag since leaves do not have child nodes. .. warning:: Note that unknown parameters passed to this method will be ignored, so may want to double check the spelling of these (i.e. if you write them incorrectly, they will most probably be ignored). Parameters ---------- title The new title for the destination. If omitted or None, the original title is used. filters : Filters Specifying this parameter overrides the original filter properties in the source node. If specified, it must be an instance of the Filters class (see :ref:`FiltersClassDescr`). The default is to copy the filter properties from the source node. copyuserattrs You can prevent the user attributes from being copied by setting this parameter to False. The default is to copy them. start, stop, step : int Specify the range of rows to be copied; the default is to copy all the rows. stats This argument may be used to collect statistics on the copy process. When used, it should be a dictionary with keys 'groups', 'leaves' and 'bytes' having a numeric value. Their values will be incremented to reflect the number of groups, leaves and bytes, respectively, that have been copied during the operation. chunkshape The chunkshape of the new leaf. It supports a couple of special values. A value of keep means that the chunkshape will be the same than original leaf (this is the default). A value of auto means that a new shape will be computed automatically in order to ensure best performance when accessing the dataset through the main dimension. Any other value should be an integer or a tuple matching the dimensions of the leaf. """ return self._f_copy(newparent, newname, overwrite, createparents, **kwargs) def truncate(self, size): """Truncate the main dimension to be size rows. If the main dimension previously was larger than this size, the extra data is lost. If the main dimension previously was shorter, it is extended, and the extended part is filled with the default values. The truncation operation can only be applied to *enlargeable* datasets, else a TypeError will be raised. """ # A non-enlargeable arrays (Array, CArray) cannot be truncated if self.extdim < 0: raise TypeError("non-enlargeable datasets cannot be truncated") self._g_truncate(size) def isvisible(self): """Is this node visible? This method has the behavior described in :meth:`Node._f_isvisible()`. """ return self._f_isvisible() isVisible = previous_api(isvisible) # Attribute handling # `````````````````` def get_attr(self, name): """Get a PyTables attribute from this node. This method has the behavior described in :meth:`Node._f_getattr`. """ return self._f_getattr(name) getAttr = previous_api(get_attr) def set_attr(self, name, value): """Set a PyTables attribute for this node. This method has the behavior described in :meth:`Node._f_setattr()`. """ self._f_setattr(name, value) setAttr = previous_api(set_attr) def del_attr(self, name): """Delete a PyTables attribute from this node. This method has the behavior described in :meth:`Node_f_delAttr`. """ self._f_delattr(name) delAttr = previous_api(del_attr) # Data handling # ````````````` def flush(self): """Flush pending data to disk. Saves whatever remaining buffered data to disk. It also releases I/O buffers, so if you are filling many datasets in the same PyTables session, please call flush() extensively so as to help PyTables to keep memory requirements low. """ self._g_flush() def _f_close(self, flush=True): """Close this node in the tree. This method has the behavior described in :meth:`Node._f_close`. Besides that, the optional argument flush tells whether to flush pending data to disk or not before closing. """ if not self._v_isopen: return # the node is already closed or not initialized # Only do a flush in case the leaf has an IO buffer. The # internal buffers of HDF5 will be flushed afterwards during the # self._g_close() call. Avoiding an unnecessary flush() # operation accelerates the closing for the unbuffered leaves. if flush and hasattr(self, "_v_iobuf"): self.flush() # Close the dataset and release resources self._g_close() # Close myself as a node. super(Leaf, self)._f_close() def close(self, flush=True): """Close this node in the tree. This method is completely equivalent to :meth:`Leaf._f_close`. """ self._f_close(flush)
[path for path in alivenodes], lambda path: alivenodes[path]) # Next, revive the dead nodes, close and delete them # so they are not placed in the limbo again. # These two steps ensure tables are closed *before* their indices. closenodes(prefix, [path for path in deadnodes if '/_i_' not in path], # not indices lambda path: revivenode(path)) # Close everything else (i.e. indices) closenodes(prefix, [path for path in deadnodes], lambda path: revivenode(path)) _g_closeDescendents = previous_api(_g_close_descendents) def _g_close(self): """Close this (open) group.""" # hdf5extension operations: # Close HDF5 group. self._g_close_group() # Close myself as a node. super(Group, self)._f_close() def _f_close(self): """Close this group and all its descendents. This method has the behavior described in :meth:`Node._f_close`.
# We need to use this strange way to obtain a dtype compliant # array because NumPy doesn't honor the shape of the dtype when # it is multidimensional. See: # http://scipy.org/scipy/numpy/ticket/926 # for details. # All of this is done just to taking advantage of the NumPy # broadcasting rules. newshape = nparr.shape[:-len(atom.dtype.shape)] nparr2 = numpy.empty(newshape, dtype=[('', atom.dtype)]) nparr2['f0'][:] = nparr # Return a view (i.e. get rid of the record type) nparr = nparr2.view(atom.dtype) return nparr convertToNPAtom = previous_api(convert_to_np_atom) # The next is used in Array, EArray and VLArray, and it is a bit more # high level than convert_to_np_atom def convert_to_np_atom2(object, atom): """Convert a generic object into a NumPy object compliant with atom.""" # Check whether the object needs to be copied to make the operation # safe to in-place conversion. copy = atom.type in ['time64'] nparr = convert_to_np_atom(object, atom, copy) # Finally, check the byteorder and change it if needed byteorder = byteorders[nparr.dtype.byteorder] if (byteorder in ['little', 'big'] and byteorder != sys.byteorder): # The byteorder needs to be fixed (a copy is made
# Now, create the new group. This works even if dstgroup == '/' for nodeName in dstgroup.split('/'): if nodeName == '': continue # First try if possible intermediate groups does already exist. try: group2 = dstfileh.get_node(group, nodeName) except NoSuchNodeError: # The group does not exist. Create it. group2 = dstfileh.create_group(group, nodeName, title=title, filters=filters) group = group2 return group newdstGroup = previous_api(newdst_group) def recreate_indexes(table, dstfileh, dsttable): listoldindexes = table._listoldindexes if listoldindexes != []: if not regoldindexes: if verbose: print("[I]Not regenerating indexes for table: '%s:%s'" % (dstfileh.filename, dsttable._v_pathname)) return # Now, recreate the indexed columns if verbose: print("[I]Regenerating indexes for table: '%s:%s'" % (dstfileh.filename, dsttable._v_pathname)) for colname in listoldindexes:
class RootGroup(Group): _v_objectId = previous_api_property('_v_objectid') def __init__(self, ptfile, name, title, new, filters): mydict = self.__dict__ # Set group attributes. self._v_version = obversion self._v_new = new if new: self._v_new_title = title self._v_new_filters = filters else: self._v_new_title = None self._v_new_filters = None # Set node attributes. self._v_file = ptfile self._v_isopen = True # root is always open self._v_pathname = '/' self._v_name = '/' self._v_depth = 0 self._v_max_group_width = ptfile.params['MAX_GROUP_WIDTH'] self._v__deleting = False self._v_objectid = None # later # Only the root node has the file as a parent. # Bypass __setattr__ to avoid the ``Node._v_parent`` property. mydict['_v_parent'] = ptfile ptfile._node_manager.register_node(self, '/') # hdf5extension operations (do before setting an AttributeSet): # Update node attributes. self._g_new(ptfile, name, init=True) # Open the node and get its object ID. self._v_objectid = self._g_open() # Set disk attributes and read children names. # # This *must* be postponed because this method needs the root node # to be created and bound to ``File.root``. # This is an exception to the rule, handled by ``File.__init()__``. # # self._g_post_init_hook() def _g_load_child(self, childname): """Load a child node from disk. The child node `childname` is loaded from disk and an adequate `Node` object is created and returned. If there is no such child, a `NoSuchNodeError` is raised. """ if self._v_file.root_uep != "/": childname = join_path(self._v_file.root_uep, childname) # Is the node a group or a leaf? node_type = self._g_check_has_child(childname) # Nodes that HDF5 report as H5G_UNKNOWN if node_type == 'Unknown': return Unknown(self, childname) # Guess the PyTables class suited to the node, # build a PyTables node and return it. if node_type == "Group": if self._v_file.params['PYTABLES_SYS_ATTRS']: ChildClass = self._g_get_child_group_class(childname) else: # Default is a Group class ChildClass = Group return ChildClass(self, childname, new=False) elif node_type == "Leaf": ChildClass = self._g_get_child_leaf_class(childname, warn=True) # Building a leaf may still fail because of unsupported types # and other causes. # return ChildClass(self, childname) # uncomment for debugging try: return ChildClass(self, childname) except Exception as exc: # XXX warnings.warn( "problems loading leaf ``%s``::\n\n" " %s\n\n" "The leaf will become an ``UnImplemented`` node." % (self._g_join(childname), exc)) # If not, associate an UnImplemented object to it return UnImplemented(self, childname) elif node_type == "SoftLink": return SoftLink(self, childname) elif node_type == "ExternalLink": return ExternalLink(self, childname) else: return UnImplemented(self, childname) _g_loadChild = previous_api(_g_load_child) def _f_rename(self, newname): raise NodeError("the root node can not be renamed") def _f_move(self, newparent=None, newname=None, createparents=False): raise NodeError("the root node can not be moved") def _f_remove(self, recursive=False): raise NodeError("the root node can not be removed")
newdict = {"columns": {}, } if '__doc__' in classdict: newdict['__doc__'] = classdict['__doc__'] for b in bases: if "columns" in b.__dict__: newdict["columns"].update(b.__dict__["columns"]) for k in classdict: # if not (k.startswith('__') or k.startswith('_v_')): # We let pass _v_ variables to configure class behaviour if not (k.startswith('__')): newdict["columns"][k] = classdict[k] # Return a new class with the "columns" attribute filled return type.__new__(cls, classname, bases, newdict) metaIsDescription = previous_api(MetaIsDescription) class IsDescription(object, metaclass=MetaIsDescription): """Description of the structure of a table or nested column. This class is designed to be used as an easy, yet meaningful way to describe the structure of new Table (see :ref:`TableClassDescr`) datasets or nested columns through the definition of *derived classes*. In order to define such a class, you must declare it as descendant of IsDescription, with as many attributes as columns you want in your table. The name of each attribute will become the name of a column, and its value will hold a description of it. Ordinary columns can be described using instances of the Col class (see :ref:`ColClassDescr`). Nested columns can be described by using classes
class _ChildrenDict(tables.misc.proxydict.ProxyDict): def _get_value_from_container(self, container, key): return container._f_get_child(key) _getValueFromContainer = previous_api(_get_value_from_container)
def new_node(h5file, **kwargs): """Creates a new file node object in the specified PyTables file object. Additional named arguments where and name must be passed to specify where the file node is to be created. Other named arguments such as title and filters may also be passed. The special named argument expectedsize, indicating an estimate of the file size in bytes, may also be passed. It returns the file node object. """ return RAFileNode(None, h5file, **kwargs) newNode = previous_api(new_node) def open_node(node, mode='r'): """Opens an existing file node. Returns a file node object from the existing specified PyTables node. If mode is not specified or it is 'r', the file can only be read, and the pointer is positioned at the beginning of the file. If mode is 'a+', the file can be read and appended, and the pointer is positioned at the end of the file. """ if mode == 'r': return ROFileNode(node)
class Group(hdf5extension.Group, Node): """Basic PyTables grouping structure. Instances of this class are grouping structures containing *child* instances of zero or more groups or leaves, together with supporting metadata. Each group has exactly one *parent* group. Working with groups and leaves is similar in many ways to working with directories and files, respectively, in a Unix filesystem. As with Unix directories and files, objects in the object tree are often described by giving their full (or absolute) path names. This full path can be specified either as a string (like in '/group1/group2') or as a complete object path written in *natural naming* schema (like in file.root.group1.group2). A collateral effect of the *natural naming* schema is that the names of members in the Group class and its instances must be carefully chosen to avoid colliding with existing children node names. For this reason and to avoid polluting the children namespace all members in a Group start with some reserved prefix, like _f_ (for public methods), _g_ (for private ones), _v_ (for instance variables) or _c_ (for class variables). Any attempt to create a new child node whose name starts with one of these prefixes will raise a ValueError exception. Another effect of natural naming is that children named after Python keywords or having names not valid as Python identifiers (e.g. class, $a or 44) can not be accessed using the node.child syntax. You will be forced to use node._f_get_child(child) to access them (which is recommended for programmatic accesses). You will also need to use _f_get_child() to access an existing child node if you set a Python attribute in the Group with the same name as that node (you will get a NaturalNameWarning when doing this). Parameters ---------- parentnode The parent :class:`Group` object. .. versionchanged:: 3.0 Renamed from *parentNode* to *parentnode* name : str The name of this node in its parent group. title The title for this group new If this group is new or has to be read from disk filters : Filters A Filters instance Notes ----- The following documentation includes methods that are automatically called when a Group instance is accessed in a special way. For instance, this class defines the __setattr__, __getattr__, and __delattr__ methods, and they set, get and delete *ordinary Python attributes* as normally intended. In addition to that, __getattr__ allows getting *child nodes* by their name for the sake of easy interaction on the command line, as long as there is no Python attribute with the same name. Groups also allow the interactive completion (when using readline) of the names of child nodes. For instance:: # get a Python attribute nchild = group._v_nchildren # Add a Table child called 'table' under 'group'. h5file.create_table(group, 'table', myDescription) table = group.table # get the table child instance group.table = 'foo' # set a Python attribute # (PyTables warns you here about using the name of a child node.) foo = group.table # get a Python attribute del group.table # delete a Python attribute table = group.table # get the table child instance again .. rubric:: Group attributes The following instance variables are provided in addition to those in Node (see :ref:`NodeClassDescr`): .. attribute:: _v_children Dictionary with all nodes hanging from this group. .. attribute:: _v_groups Dictionary with all groups hanging from this group. .. attribute:: _v_hidden Dictionary with all hidden nodes hanging from this group. .. attribute:: _v_leaves Dictionary with all leaves hanging from this group. .. attribute:: _v_links Dictionary with all links hanging from this group. .. attribute:: _v_unknown Dictionary with all unknown nodes hanging from this group. """ # Class identifier. _c_classid = 'GROUP' _c_classId = previous_api_property('_c_classid') # Children containers that should be loaded only in a lazy way. # These are documented in the ``Group._g_add_children_names`` method. _c_lazy_children_attrs = ( '__members__', '_v_children', '_v_groups', '_v_leaves', '_v_links', '_v_unknown', '_v_hidden') # `_v_nchildren` is a direct read-only shorthand # for the number of *visible* children in a group. def _g_getnchildren(self): return len(self._v_children) _v_nchildren = property(_g_getnchildren, None, None, "The number of children hanging from this group.") # `_v_filters` is a direct read-write shorthand for the ``FILTERS`` # attribute with the default `Filters` instance as a default value. def _g_getfilters(self): filters = getattr(self._v_attrs, 'FILTERS', None) if filters is None: filters = Filters() return filters def _g_setfilters(self, value): if not isinstance(value, Filters): raise TypeError( "value is not an instance of `Filters`: %r" % (value,)) self._v_attrs.FILTERS = value def _g_delfilters(self): del self._v_attrs.FILTERS _v_filters = property( _g_getfilters, _g_setfilters, _g_delfilters, """Default filter properties for child nodes. You can (and are encouraged to) use this property to get, set and delete the FILTERS HDF5 attribute of the group, which stores a Filters instance (see :ref:`FiltersClassDescr`). When the group has no such attribute, a default Filters instance is used. """) _v_maxGroupWidth = previous_api_property('_v_max_group_width') def __init__(self, parentnode, name, title="", new=False, filters=None, _log=True): # Remember to assign these values in the root group constructor # if it does not use this one! # First, set attributes belonging to group objects. self._v_version = obversion """The object version of this group.""" self._v_new = new """Is this the first time the node has been created?""" self._v_new_title = title """New title for this node.""" self._v_new_filters = filters """New default filter properties for child nodes.""" self._v_max_group_width = parentnode._v_file.params['MAX_GROUP_WIDTH'] """Maximum number of children on each group before warning the user. .. versionchanged:: 3.0 The *_v_maxGroupWidth* attribute has been renamed into *_v_max_group_width*. """ # Finally, set up this object as a node. super(Group, self).__init__(parentnode, name, _log) def _g_post_init_hook(self): if self._v_new: if self._v_file.params['PYTABLES_SYS_ATTRS']: # Save some attributes for the new group on disk. set_attr = self._v_attrs._g__setattr # Set the title, class and version attributes. set_attr('TITLE', self._v_new_title) set_attr('CLASS', self._c_classid) set_attr('VERSION', self._v_version) # Set the default filter properties. newfilters = self._v_new_filters if newfilters is None: # If no filters have been passed in the constructor, # inherit them from the parent group, but only if they # have been inherited or explicitly set. newfilters = getattr( self._v_parent._v_attrs, 'FILTERS', None) if newfilters is not None: set_attr('FILTERS', newfilters) else: # If the file has PyTables format, get the VERSION attr if 'VERSION' in self._v_attrs._v_attrnamessys: self._v_version = self._v_attrs.VERSION else: self._v_version = "0.0 (unknown)" # We don't need to get more attributes from disk, # since the most important ones are defined as properties. _g_postInitHook = previous_api(_g_post_init_hook) def __del__(self): if (self._v_isopen and self._v_pathname in self._v_file._node_manager.registry and '_v_children' in self.__dict__): # The group is going to be killed. Rebuild weak references # (that Python cancelled just before calling this method) so # that they are still usable if the object is revived later. selfref = weakref.ref(self) self._v_children.containerref = selfref self._v_groups.containerref = selfref self._v_leaves.containerref = selfref self._v_links.containerref = selfref self._v_unknown.containerref = selfref self._v_hidden.containerref = selfref super(Group, self).__del__() def _g_get_child_group_class(self, childname): """Get the class of a not-yet-loaded group child. `childname` must be the name of a *group* child. """ childCID = self._g_get_gchild_attr(childname, 'CLASS') if childCID is not None and not isinstance(childCID, str): childCID = childCID.decode('utf-8') if childCID in class_id_dict: return class_id_dict[childCID] # look up group class else: return Group # default group class _g_getChildGroupClass = previous_api(_g_get_child_group_class) def _g_get_child_leaf_class(self, childname, warn=True): """Get the class of a not-yet-loaded leaf child. `childname` must be the name of a *leaf* child. If the child belongs to an unknown kind of leaf, or if its kind can not be guessed, `UnImplemented` will be returned and a warning will be issued if `warn` is true. """ if self._v_file.params['PYTABLES_SYS_ATTRS']: childCID = self._g_get_lchild_attr(childname, 'CLASS') if childCID is not None and not isinstance(childCID, str): childCID = childCID.decode('utf-8') else: childCID = None if childCID in class_id_dict: return class_id_dict[childCID] # look up leaf class else: # Unknown or no ``CLASS`` attribute, try a guess. childCID2 = utilsextension.which_class(self._v_objectid, childname) if childCID2 == 'UNSUPPORTED': if warn: if childCID is None: warnings.warn( "leaf ``%s`` is of an unsupported type; " "it will become an ``UnImplemented`` node" % self._g_join(childname)) else: warnings.warn( ("leaf ``%s`` has an unknown class ID ``%s``; " "it will become an ``UnImplemented`` node") % (self._g_join(childname), childCID)) return UnImplemented assert childCID2 in class_id_dict return class_id_dict[childCID2] # look up leaf class _g_getChildLeafClass = previous_api(_g_get_child_leaf_class) def _g_add_children_names(self): """Add children names to this group taking into account their visibility and kind.""" mydict = self.__dict__ # The names of the lazy attributes mydict['__members__'] = members = [] """The names of visible children nodes for readline-style completion. """ mydict['_v_children'] = children = _ChildrenDict(self) """The number of children hanging from this group.""" mydict['_v_groups'] = groups = _ChildrenDict(self) """Dictionary with all groups hanging from this group.""" mydict['_v_leaves'] = leaves = _ChildrenDict(self) """Dictionary with all leaves hanging from this group.""" mydict['_v_links'] = links = _ChildrenDict(self) """Dictionary with all links hanging from this group.""" mydict['_v_unknown'] = unknown = _ChildrenDict(self) """Dictionary with all unknown nodes hanging from this group.""" mydict['_v_hidden'] = hidden = _ChildrenDict(self) """Dictionary with all hidden nodes hanging from this group.""" # Get the names of *all* child groups and leaves. (group_names, leaf_names, link_names, unknown_names) = \ self._g_list_group(self._v_parent) # Separate groups into visible groups and hidden nodes, # and leaves into visible leaves and hidden nodes. for (childnames, childdict) in ((group_names, groups), (leaf_names, leaves), (link_names, links), (unknown_names, unknown)): for childname in childnames: # See whether the name implies that the node is hidden. # (Assigned values are entirely irrelevant.) if isvisiblename(childname): # Visible node. members.insert(0, childname) children[childname] = None childdict[childname] = None else: # Hidden node. hidden[childname] = None _g_addChildrenNames = previous_api(_g_add_children_names) def _g_check_has_child(self, name): """Check whether 'name' is a children of 'self' and return its type.""" # Get the HDF5 name matching the PyTables name. node_type = self._g_get_objinfo(name) if node_type == "NoSuchNode": raise NoSuchNodeError( "group ``%s`` does not have a child named ``%s``" % (self._v_pathname, name)) return node_type _g_checkHasChild = previous_api(_g_check_has_child) def __iter__(self): """Iterate over the child nodes hanging directly from the group. This iterator is *not* recursive. Examples -------- :: # Non-recursively list all the nodes hanging from '/detector' print("Nodes in '/detector' group:") for node in h5file.root.detector: print(node) """ return self._f_iter_nodes() def __contains__(self, name): """Is there a child with that `name`? Returns a true value if the group has a child node (visible or hidden) with the given `name` (a string), false otherwise. """ self._g_check_open() try: self._g_check_has_child(name) except NoSuchNodeError: return False return True def _f_walknodes(self, classname=None): """Iterate over descendant nodes. This method recursively walks *self* top to bottom (preorder), iterating over child groups in alphanumerical order, and yielding nodes. If classname is supplied, only instances of the named class are yielded. If *classname* is Group, it behaves like :meth:`Group._f_walk_groups`, yielding only groups. If you don't want a recursive behavior, use :meth:`Group._f_iter_nodes` instead. Examples -------- :: # Recursively print all the arrays hanging from '/' print("Arrays in the object tree '/':") for array in h5file.root._f_walknodes('Array', recursive=True): print(array) """ self._g_check_open() # For compatibility with old default arguments. if classname == '': classname = None if classname == "Group": # Recursive algorithm for group in self._f_walk_groups(): yield group else: for group in self._f_walk_groups(): for leaf in group._f_iter_nodes(classname): yield leaf _f_walkNodes = previous_api(_f_walknodes) def _g_join(self, name): """Helper method to correctly concatenate a name child object with the pathname of this group.""" if name == "/": # This case can happen when doing copies return self._v_pathname return join_path(self._v_pathname, name) def _g_width_warning(self): """Issue a :exc:`PerformanceWarning` on too many children.""" warnings.warn("""\ group ``%s`` is exceeding the recommended maximum number of children (%d); \ be ready to see PyTables asking for *lots* of memory and possibly slow I/O.""" % (self._v_pathname, self._v_max_group_width), PerformanceWarning) _g_widthWarning = previous_api(_g_width_warning) def _g_refnode(self, childnode, childname, validate=True): """Insert references to a `childnode` via a `childname`. Checks that the `childname` is valid and does not exist, then creates references to the given `childnode` by that `childname`. The validation of the name can be omitted by setting `validate` to a false value (this may be useful for adding already existing nodes to the tree). """ # Check for name validity. if validate: check_name_validity(childname) childnode._g_check_name(childname) # Check if there is already a child with the same name. # This can be triggered because of the user # (via node construction or renaming/movement). # Links are not checked here because they are copied and referenced # using ``File.get_node`` so they already exist in `self`. if (not isinstance(childnode, Link)) and childname in self: raise NodeError( "group ``%s`` already has a child node named ``%s``" % (self._v_pathname, childname)) # Show a warning if there is an object attribute with that name. if childname in self.__dict__: warnings.warn( "group ``%s`` already has an attribute named ``%s``; " "you will not be able to use natural naming " "to access the child node" % (self._v_pathname, childname), NaturalNameWarning) # Check group width limits. if (len(self._v_children) + len(self._v_hidden) >= self._v_max_group_width): self._g_width_warning() # Update members information. # Insert references to the new child. # (Assigned values are entirely irrelevant.) if isvisiblename(childname): # Visible node. self.__members__.insert(0, childname) # enable completion self._v_children[childname] = None # insert node if isinstance(childnode, Unknown): self._v_unknown[childname] = None elif isinstance(childnode, Link): self._v_links[childname] = None elif isinstance(childnode, Leaf): self._v_leaves[childname] = None elif isinstance(childnode, Group): self._v_groups[childname] = None else: # Hidden node. self._v_hidden[childname] = None # insert node _g_refNode = previous_api(_g_refnode) def _g_unrefnode(self, childname): """Remove references to a node. Removes all references to the named node. """ # This can *not* be triggered because of the user. assert childname in self, \ ("group ``%s`` does not have a child node named ``%s``" % (self._v_pathname, childname)) # Update members information, if needed if '_v_children' in self.__dict__: if childname in self._v_children: # Visible node. members = self.__members__ member_index = members.index(childname) del members[member_index] # disables completion del self._v_children[childname] # remove node self._v_unknown.pop(childname, None) self._v_links.pop(childname, None) self._v_leaves.pop(childname, None) self._v_groups.pop(childname, None) else: # Hidden node. del self._v_hidden[childname] # remove node _g_unrefNode = previous_api(_g_unrefnode) def _g_move(self, newparent, newname): # Move the node to the new location. oldpath = self._v_pathname super(Group, self)._g_move(newparent, newname) newpath = self._v_pathname # Update location information in children. This node shouldn't # be affected since it has already been relocated. self._v_file._update_node_locations(oldpath, newpath) def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs): # Compute default arguments. title = kwargs.get('title', self._v_title) filters = kwargs.get('filters', None) stats = kwargs.get('stats', None) # Fix arguments with explicit None values for backwards compatibility. if title is None: title = self._v_title # If no filters have been passed to the call, copy them from the # source group, but only if inherited or explicitly set. if filters is None: filters = getattr(self._v_attrs, 'FILTERS', None) # Create a copy of the object. new_node = Group(newparent, newname, title, new=True, filters=filters, _log=_log) # Copy user attributes if needed. if kwargs.get('copyuserattrs', True): self._v_attrs._g_copy(new_node._v_attrs, copyclass=True) # Update statistics if needed. if stats is not None: stats['groups'] += 1 if recursive: # Copy child nodes if a recursive copy was requested. # Some arguments should *not* be passed to children copy ops. kwargs = kwargs.copy() kwargs.pop('title', None) self._g_copy_children(new_node, **kwargs) return new_node def _g_copy_children(self, newparent, **kwargs): """Copy child nodes. Copies all nodes descending from this one into the specified `newparent`. If the new parent has a child node with the same name as one of the nodes in this group, the copy fails with a `NodeError`, maybe resulting in a partial copy. Nothing is logged. """ # Recursive version of children copy. # for srcchild in self._v_children.itervalues(): ## srcchild._g_copy_as_child(newparent, **kwargs) # Non-recursive version of children copy. use_hardlinks = kwargs.get('use_hardlinks', False) if use_hardlinks: address_map = kwargs.setdefault('address_map', {}) parentstack = [(self, newparent)] # [(source, destination), ...] while parentstack: (srcparent, dstparent) = parentstack.pop() if use_hardlinks: for srcchild in srcparent._v_children.itervalues(): addr, rc = srcchild._get_obj_info() if rc > 1 and addr in address_map: where, name = address_map[addr][0] localsrc = os.path.join(where, name) dstparent._v_file.create_hard_link(dstparent, srcchild.name, localsrc) address_map[addr].append( (dstparent._v_pathname, srcchild.name) ) # Update statistics if needed. stats = kwargs.pop('stats', None) if stats is not None: stats['hardlinks'] += 1 else: dstchild = srcchild._g_copy_as_child(dstparent, **kwargs) if isinstance(srcchild, Group): parentstack.append((srcchild, dstchild)) if rc > 1: address_map[addr] = [ (dstparent._v_pathname, srcchild.name) ] else: for srcchild in srcparent._v_children.itervalues(): dstchild = srcchild._g_copy_as_child(dstparent, **kwargs) if isinstance(srcchild, Group): parentstack.append((srcchild, dstchild)) _g_copyChildren = previous_api(_g_copy_children) def _f_get_child(self, childname): """Get the child called childname of this group. If the child exists (be it visible or not), it is returned. Else, a NoSuchNodeError is raised. Using this method is recommended over getattr() when doing programmatic accesses to children if childname is unknown beforehand or when its name is not a valid Python identifier. """ self._g_check_open() self._g_check_has_child(childname) childpath = join_path(self._v_pathname, childname) return self._v_file._get_node(childpath) _f_getChild = previous_api(_f_get_child) def _f_list_nodes(self, classname=None): """Return a *list* with children nodes. This is a list-returning version of :meth:`Group._f_iter_nodes()`. """ return list(self._f_iter_nodes(classname)) _f_listNodes = previous_api(_f_list_nodes) def _f_iter_nodes(self, classname=None): """Iterate over children nodes. Child nodes are yielded alphanumerically sorted by node name. If the name of a class derived from Node (see :ref:`NodeClassDescr`) is supplied in the classname parameter, only instances of that class (or subclasses of it) will be returned. This is an iterator version of :meth:`Group._f_list_nodes`. """ self._g_check_open() if not classname: # Returns all the children alphanumerically sorted names = sorted(self._v_children.iterkeys()) for name in names: yield self._v_children[name] elif classname == 'Group': # Returns all the groups alphanumerically sorted names = sorted(self._v_groups.iterkeys()) for name in names: yield self._v_groups[name] elif classname == 'Leaf': # Returns all the leaves alphanumerically sorted names = sorted(self._v_leaves.iterkeys()) for name in names: yield self._v_leaves[name] elif classname == 'Link': # Returns all the links alphanumerically sorted names = sorted(self._v_links.iterkeys()) for name in names: yield self._v_links[name] elif classname == 'IndexArray': raise TypeError( "listing ``IndexArray`` nodes is not allowed") else: class_ = get_class_by_name(classname) children = self._v_children childnames = sorted(children.iterkeys()) for childname in childnames: childnode = children[childname] if isinstance(childnode, class_): yield childnode _f_iterNodes = previous_api(_f_iter_nodes) def _f_walk_groups(self): """Recursively iterate over descendent groups (not leaves). This method starts by yielding *self*, and then it goes on to recursively iterate over all child groups in alphanumerical order, top to bottom (preorder), following the same procedure. """ self._g_check_open() stack = [self] yield self # Iterate over the descendants while stack: objgroup = stack.pop() groupnames = sorted(objgroup._v_groups.iterkeys()) # Sort the groups before delivering. This uses the groups names # for groups in tree (in order to sort() can classify them). for groupname in groupnames: stack.append(objgroup._v_groups[groupname]) yield objgroup._v_groups[groupname] _f_walkGroups = previous_api(_f_walk_groups) def __delattr__(self, name): """Delete a Python attribute called name. This method deletes an *ordinary Python attribute* from the object. It does *not* remove children nodes from this group; for that, use :meth:`File.remove_node` or :meth:`Node._f_remove`. It does *neither* delete a PyTables node attribute; for that, use :meth:`File.del_node_attr`, :meth:`Node._f_delattr` or :attr:`Node._v_attrs``. If there is an attribute and a child node with the same name, the child node will be made accessible again via natural naming. """ try: super(Group, self).__delattr__(name) # nothing particular except AttributeError as ae: hint = " (use ``node._f_remove()`` if you want to remove a node)" raise ae.__class__(str(ae) + hint) def __getattr__(self, name): """Get a Python attribute or child node called name. If the object has a Python attribute called name, its value is returned. Else, if the node has a child node called name, it is returned. Else, an AttributeError is raised. """ # That is true since a `NoSuchNodeError` is an `AttributeError`. mydict = self.__dict__ if name in mydict: return mydict[name] elif name in self._c_lazy_children_attrs: self._g_add_children_names() return mydict[name] return self._f_get_child(name) def __setattr__(self, name, value): """Set a Python attribute called name with the given value. This method stores an *ordinary Python attribute* in the object. It does *not* store new children nodes under this group; for that, use the File.create*() methods (see the File class in :ref:`FileClassDescr`). It does *neither* store a PyTables node attribute; for that, use :meth:`File.set_node_attr`, :meth`:Node._f_setattr` or :attr:`Node._v_attrs`. If there is already a child node with the same name, a NaturalNameWarning will be issued and the child node will not be accessible via natural naming nor getattr(). It will still be available via :meth:`File.get_node`, :meth:`Group._f_get_child` and children dictionaries in the group (if visible). """ # Show a warning if there is an child node with that name. # # ..note:: # # Using ``if name in self:`` is not right since that would # require ``_v_children`` and ``_v_hidden`` to be already set # when the very first attribute assignments are made. # Moreover, this warning is only concerned about clashes with # names used in natural naming, i.e. those in ``__members__``. # # ..note:: # # The check ``'__members__' in myDict`` allows attribute # assignment to happen before calling `Group.__init__()`, by # avoiding to look into the still not assigned ``__members__`` # attribute. This allows subclasses to set up some attributes # and then call the constructor of the superclass. If the # check above is disabled, that results in Python entering an # endless loop on exit! mydict = self.__dict__ if '__members__' in mydict and name in self.__members__: warnings.warn( "group ``%s`` already has a child node named ``%s``; " "you will not be able to use natural naming " "to access the child node" % (self._v_pathname, name), NaturalNameWarning) super(Group, self).__setattr__(name, value) def _f_flush(self): """Flush this Group.""" self._g_check_open() self._g_flush_group() def _g_close_descendents(self): """Close all the *loaded* descendent nodes of this group.""" node_manager = self._v_file._node_manager node_manager.close_subtree(self._v_pathname) _g_closeDescendents = previous_api(_g_close_descendents) def _g_close(self): """Close this (open) group.""" if self._v_isopen: # hdf5extension operations: # Close HDF5 group. self._g_close_group() # Close myself as a node. super(Group, self)._f_close() def _f_close(self): """Close this group and all its descendents. This method has the behavior described in :meth:`Node._f_close`. It should be noted that this operation closes all the nodes descending from this group. You should not need to close nodes manually because they are automatically opened/closed when they are loaded/evicted from the integrated LRU cache. """ # If the group is already closed, return immediately if not self._v_isopen: return # First, close all the descendents of this group, unless a) the # group is being deleted (evicted from LRU cache) or b) the node # is being closed during an aborted creation, in which cases # this is not an explicit close issued by the user. if not (self._v__deleting or self._v_objectid is None): self._g_close_descendents() # When all the descendents have been closed, close this group. # This is done at the end because some nodes may still need to # be loaded during the closing process; thus this node must be # open until the very end. self._g_close() def _g_remove(self, recursive=False, force=False): """Remove (recursively if needed) the Group. This version correctly handles both visible and hidden nodes. """ if self._v_nchildren > 0: if not (recursive or force): raise NodeError("group ``%s`` has child nodes; " "please set `recursive` or `force` to true " "to remove it" % (self._v_pathname,)) # First close all the descendents hanging from this group, # so that it is not possible to use a node that no longer exists. self._g_close_descendents() # Remove the node itself from the hierarchy. super(Group, self)._g_remove(recursive, force) def _f_copy(self, newparent=None, newname=None, overwrite=False, recursive=False, createparents=False, **kwargs): """Copy this node and return the new one. This method has the behavior described in :meth:`Node._f_copy`. In addition, it recognizes the following keyword arguments: Parameters ---------- title The new title for the destination. If omitted or None, the original title is used. This only applies to the topmost node in recursive copies. filters : Filters Specifying this parameter overrides the original filter properties in the source node. If specified, it must be an instance of the Filters class (see :ref:`FiltersClassDescr`). The default is to copy the filter properties from the source node. copyuserattrs You can prevent the user attributes from being copied by setting thisparameter to False. The default is to copy them. stats This argument may be used to collect statistics on the copy process. When used, it should be a dictionary with keys 'groups', 'leaves', 'links' and 'bytes' having a numeric value. Their values willbe incremented to reflect the number of groups, leaves and bytes, respectively, that have been copied during the operation. """ return super(Group, self)._f_copy( newparent, newname, overwrite, recursive, createparents, **kwargs) def _f_copy_children(self, dstgroup, overwrite=False, recursive=False, createparents=False, **kwargs): """Copy the children of this group into another group. Children hanging directly from this group are copied into dstgroup, which can be a Group (see :ref:`GroupClassDescr`) object or its pathname in string form. If createparents is true, the needed groups for the given destination group path to exist will be created. The operation will fail with a NodeError if there is a child node in the destination group with the same name as one of the copied children from this one, unless overwrite is true; in this case, the former child node is recursively removed before copying the later. By default, nodes descending from children groups of this node are not copied. If the recursive argument is true, all descendant nodes of this node are recursively copied. Additional keyword arguments may be passed to customize the copying process. For instance, title and filters may be changed, user attributes may be or may not be copied, data may be sub-sampled, stats may be collected, etc. Arguments unknown to nodes are simply ignored. Check the documentation for copying operations of nodes to see which options they support. """ self._g_check_open() # `dstgroup` is used instead of its path to avoid accepting # `Node` objects when `createparents` is true. Also, note that # there is no risk of creating parent nodes and failing later # because of destination nodes already existing. dstparent = self._v_file._get_or_create_path(dstgroup, createparents) self._g_check_group(dstparent) # Is it a group? if not overwrite: # Abort as early as possible when destination nodes exist # and overwriting is not enabled. for childname in self._v_children: if childname in dstparent: raise NodeError( "destination group ``%s`` already has " "a node named ``%s``; " "you may want to use the ``overwrite`` argument" % (dstparent._v_pathname, childname)) use_hardlinks = kwargs.get('use_hardlinks', False) if use_hardlinks: address_map = kwargs.setdefault('address_map', {}) for child in self._v_children.itervalues(): addr, rc = child._get_obj_info() if rc > 1 and addr in address_map: where, name = address_map[addr][0] localsrc = os.path.join(where, name) dstparent._v_file.create_hard_link(dstparent, child.name, localsrc) address_map[addr].append( (dstparent._v_pathname, child.name) ) # Update statistics if needed. stats = kwargs.pop('stats', None) if stats is not None: stats['hardlinks'] += 1 else: child._f_copy(dstparent, None, overwrite, recursive, **kwargs) if rc > 1: address_map[addr] = [ (dstparent._v_pathname, child.name) ] else: for child in self._v_children.itervalues(): child._f_copy(dstparent, None, overwrite, recursive, **kwargs) _f_copyChildren = previous_api(_f_copy_children) def __str__(self): """Return a short string representation of the group. Examples -------- :: >>> f=tables.open_file('data/test.h5') >>> print(f.root.group0) /group0 (Group) 'First Group' """ pathname = self._v_pathname classname = self.__class__.__name__ title = self._v_title return "%s (%s) %r" % (pathname, classname, title) def __repr__(self): """Return a detailed string representation of the group. Examples -------- :: >>> f = tables.open_file('data/test.h5') >>> f.root.group0 /group0 (Group) 'First Group' children := ['tuple1' (Table), 'group1' (Group)] """ rep = [ '%r (%s)' % (childname, child.__class__.__name__) for (childname, child) in self._v_children.iteritems() ] childlist = '[%s]' % (', '.join(rep)) return "%s\n children := %s" % (str(self), childlist)
class Enum(object): """Enumerated type. Each instance of this class represents an enumerated type. The values of the type must be declared *exhaustively* and named with *strings*, and they might be given explicit concrete values, though this is not compulsory. Once the type is defined, it can not be modified. There are three ways of defining an enumerated type. Each one of them corresponds to the type of the only argument in the constructor of Enum: - *Sequence of names*: each enumerated value is named using a string, and its order is determined by its position in the sequence; the concrete value is assigned automatically:: >>> boolEnum = Enum(['True', 'False']) - *Mapping of names*: each enumerated value is named by a string and given an explicit concrete value. All of the concrete values must be different, or a ValueError will be raised:: >>> priority = Enum({'red': 20, 'orange': 10, 'green': 0}) >>> colors = Enum({'red': 1, 'blue': 1}) Traceback (most recent call last): ... ValueError: enumerated values contain duplicate concrete values: 1 - *Enumerated type*: in that case, a copy of the original enumerated type is created. Both enumerated types are considered equal:: >>> prio2 = Enum(priority) >>> priority == prio2 True Please note that names starting with _ are not allowed, since they are reserved for internal usage:: >>> prio2 = Enum(['_xx']) Traceback (most recent call last): ... ValueError: name of enumerated value can not start with ``_``: '_xx' The concrete value of an enumerated value is obtained by getting its name as an attribute of the Enum instance (see __getattr__()) or as an item (see __getitem__()). This allows comparisons between enumerated values and assigning them to ordinary Python variables:: >>> redv = priority.red >>> redv == priority['red'] True >>> redv > priority.green True >>> priority.red == priority.orange False The name of the enumerated value corresponding to a concrete value can also be obtained by using the __call__() method of the enumerated type. In this way you get the symbolic name to use it later with __getitem__():: >>> priority(redv) 'red' >>> priority.red == priority[priority(priority.red)] True (If you ask, the __getitem__() method is not used for this purpose to avoid ambiguity in the case of using strings as concrete values.) """ def __init__(self, enum): mydict = self.__dict__ mydict['_names'] = {} mydict['_values'] = {} if isinstance(enum, list) or isinstance(enum, tuple): for (value, name) in enumerate(enum): # values become 0, 1, 2... self._check_and_set_pair(name, value) elif isinstance(enum, dict): for (name, value) in enum.iteritems(): self._check_and_set_pair(name, value) elif isinstance(enum, Enum): for (name, value) in enum._names.iteritems(): self._check_and_set_pair(name, value) else: raise TypeError("""\ enumerations can only be created from \ sequences, mappings and other enumerations""") def _check_and_set_pair(self, name, value): """Check validity of enumerated value and insert it into type.""" names = self._names values = self._values if not isinstance(name, basestring): raise TypeError("name of enumerated value is not a string: %r" % (name, )) if name.startswith('_'): raise ValueError( "name of enumerated value can not start with ``_``: %r" % name) # This check is only necessary with a sequence base object. if name in names: raise ValueError("enumerated values contain duplicate names: %r" % name) # This check is only necessary with a mapping base object. if value in values: raise ValueError( "enumerated values contain duplicate concrete values: %r" % value) names[name] = value values[value] = name self.__dict__[name] = value _checkAndSetPair = previous_api(_check_and_set_pair) def __getitem__(self, name): """Get the concrete value of the enumerated value with that name. The name of the enumerated value must be a string. If there is no value with that name in the enumeration, a KeyError is raised. Examples -------- Let ``enum`` be an enumerated type defined as: >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5}) then: >>> enum['T1'] 2 >>> enum['foo'] Traceback (most recent call last): ... KeyError: "no enumerated value with that name: 'foo'" """ try: return self._names[name] except KeyError: raise KeyError("no enumerated value with that name: %r" % (name, )) def __setitem__(self, name, value): """This operation is forbidden.""" raise IndexError("operation not allowed") def __delitem__(self, name): """This operation is forbidden.""" raise IndexError("operation not allowed") def __getattr__(self, name): """Get the concrete value of the enumerated value with that name. The name of the enumerated value must be a string. If there is no value with that name in the enumeration, an AttributeError is raised. Examples -------- Let ``enum`` be an enumerated type defined as: >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5}) then: >>> enum.T1 2 >>> enum.foo Traceback (most recent call last): ... AttributeError: no enumerated value with that name: 'foo' """ try: return self[name] except KeyError as ke: raise AttributeError(*ke.args) def __setattr__(self, name, value): """This operation is forbidden.""" raise AttributeError("operation not allowed") def __delattr__(self, name): """This operation is forbidden.""" raise AttributeError("operation not allowed") def __contains__(self, name): """Is there an enumerated value with that name in the type? If the enumerated type has an enumerated value with that name, True is returned. Otherwise, False is returned. The name must be a string. This method does *not* check for concrete values matching a value in an enumerated type. For that, please use the :meth:`Enum.__call__` method. Examples -------- Let ``enum`` be an enumerated type defined as: >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5}) then: >>> 'T1' in enum True >>> 'foo' in enum False >>> 0 in enum Traceback (most recent call last): ... TypeError: name of enumerated value is not a string: 0 >>> enum.T1 in enum # Be careful with this! Traceback (most recent call last): ... TypeError: name of enumerated value is not a string: 2 """ if not isinstance(name, basestring): raise TypeError("name of enumerated value is not a string: %r" % (name, )) return name in self._names def __call__(self, value, *default): """Get the name of the enumerated value with that concrete value. If there is no value with that concrete value in the enumeration and a second argument is given as a default, this is returned. Else, a ValueError is raised. This method can be used for checking that a concrete value belongs to the set of concrete values in an enumerated type. Examples -------- Let ``enum`` be an enumerated type defined as: >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5}) then: >>> enum(5) 'T2' >>> enum(42, None) is None True >>> enum(42) Traceback (most recent call last): ... ValueError: no enumerated value with that concrete value: 42 """ try: return self._values[value] except KeyError: if len(default) > 0: return default[0] raise ValueError( "no enumerated value with that concrete value: %r" % (value, )) def __len__(self): """Return the number of enumerated values in the enumerated type. Examples -------- >>> len(Enum(['e%d' % i for i in range(10)])) 10 """ return len(self._names) def __iter__(self): """Iterate over the enumerated values. Enumerated values are returned as (name, value) pairs *in no particular order*. Examples -------- >>> enumvals = {'red': 4, 'green': 2, 'blue': 1} >>> enum = Enum(enumvals) >>> enumdict = dict([(name, value) for (name, value) in enum]) >>> enumvals == enumdict True """ for name_value in self._names.iteritems(): yield name_value def __eq__(self, other): """Is the other enumerated type equivalent to this one? Two enumerated types are equivalent if they have exactly the same enumerated values (i.e. with the same names and concrete values). Examples -------- Let ``enum*`` be enumerated types defined as: >>> enum1 = Enum({'T0': 0, 'T1': 2}) >>> enum2 = Enum(enum1) >>> enum3 = Enum({'T1': 2, 'T0': 0}) >>> enum4 = Enum({'T0': 0, 'T1': 2, 'T2': 5}) >>> enum5 = Enum({'T0': 0}) >>> enum6 = Enum({'T0': 10, 'T1': 20}) then: >>> enum1 == enum1 True >>> enum1 == enum2 == enum3 True >>> enum1 == enum4 False >>> enum5 == enum1 False >>> enum1 == enum6 False Comparing enumerated types with other kinds of objects produces a false result: >>> enum1 == {'T0': 0, 'T1': 2} False >>> enum1 == ['T0', 'T1'] False >>> enum1 == 2 False """ if not isinstance(other, Enum): return False return self._names == other._names def __ne__(self, other): """Is the `other` enumerated type different from this one? Two enumerated types are different if they don't have exactly the same enumerated values (i.e. with the same names and concrete values). Examples -------- Let ``enum*`` be enumerated types defined as: >>> enum1 = Enum({'T0': 0, 'T1': 2}) >>> enum2 = Enum(enum1) >>> enum3 = Enum({'T1': 2, 'T0': 0}) >>> enum4 = Enum({'T0': 0, 'T1': 2, 'T2': 5}) >>> enum5 = Enum({'T0': 0}) >>> enum6 = Enum({'T0': 10, 'T1': 20}) then: >>> enum1 != enum1 False >>> enum1 != enum2 != enum3 False >>> enum1 != enum4 True >>> enum5 != enum1 True >>> enum1 != enum6 True """ return not self.__eq__(other) # XXX: API incompatible change for PyTables 3 line # Overriding __eq__ blocks inheritance of __hash__ in 3.x # def __hash__(self): # return hash((self.__class__, tuple(self._names.items()))) def __repr__(self): """Return the canonical string representation of the enumeration. The output of this method can be evaluated to give a new enumeration object that will compare equal to this one. Examples -------- >>> repr(Enum({'name': 10})) "Enum({'name': 10})" """ return 'Enum(%s)' % self._names