Example #1
0
        redo_add_attr(file_, args[0], args[1])
    elif operation == 'DELATTR':
        redo_del_attr(file_, args[0], args[1])
    else:
        raise NotImplementedError("the requested unknown operation %r can "
                                  "not be redone; please report this to the "
                                  "authors" % operation)


def move_to_shadow(file_, path):
    node = file_._get_node(path)

    (shparent, shname) = file_._shadow_name()
    node._g_move(shparent, shname)

moveToShadow = previous_api(move_to_shadow)


def move_from_shadow(file_, path):
    (shparent, shname) = file_._shadow_name()
    node = shparent._f_get_child(shname)

    (pname, name) = split_path(path)
    parent = file_._get_node(pname)
    node._g_move(parent, name)

moveFromShadow = previous_api(move_from_shadow)


def undo_create(file_, path):
    move_to_shadow(file_, path)
Example #2
0
        chunksize, slicesize = ccs_ultralight(optlevel, chunksize, slicesize)
    elif indsize == 2:  # light
        chunksize, slicesize = ccs_light(optlevel, chunksize, slicesize)
    elif indsize == 4:  # medium
        chunksize, slicesize = ccs_medium(optlevel, chunksize, slicesize)
    elif indsize == 8:  # full
        chunksize, slicesize = ccs_full(optlevel, chunksize, slicesize)

    # Finally, compute blocksize and superblocksize
    blocksize = computeblocksize(expectedrows, slicesize, chunksize)
    superblocksize = computeblocksize(expectedrows, blocksize, slicesize)
    # The size for different blocks information
    sizes = (superblocksize, blocksize, slicesize, chunksize)
    return sizes

calcChunksize = previous_api(calc_chunksize)


def ccs_ultralight(optlevel, chunksize, slicesize):
    """Correct the slicesize and the chunksize based on optlevel."""

    if optlevel in (0, 1, 2):
        slicesize //= 2
        slicesize += optlevel * slicesize
    elif optlevel in (3, 4, 5):
        slicesize *= optlevel - 1
    elif optlevel in (6, 7, 8):
        slicesize *= optlevel - 1
    elif optlevel == 9:
        slicesize *= optlevel - 1
    return chunksize, slicesize
Example #3
0
        # We need to use this strange way to obtain a dtype compliant
        # array because NumPy doesn't honor the shape of the dtype when
        # it is multidimensional.  See:
        # http://scipy.org/scipy/numpy/ticket/926
        # for details.
        # All of this is done just to taking advantage of the NumPy
        # broadcasting rules.
        newshape = nparr.shape[: -len(atom.dtype.shape)]
        nparr2 = numpy.empty(newshape, dtype=[("", atom.dtype)])
        nparr2["f0"][:] = nparr
        # Return a view (i.e. get rid of the record type)
        nparr = nparr2.view(atom.dtype)
    return nparr


convertToNPAtom = previous_api(convert_to_np_atom)


# The next is used in Array, EArray and VLArray, and it is a bit more
# high level than convert_to_np_atom
def convert_to_np_atom2(object, atom):
    """Convert a generic object into a NumPy object compliant with atom."""

    # Check whether the object needs to be copied to make the operation
    # safe to in-place conversion.
    copy = atom.type in ["time64"]
    nparr = convert_to_np_atom(object, atom, copy)
    # Finally, check the byteorder and change it if needed
    byteorder = byteorders[nparr.dtype.byteorder]
    if byteorder in ["little", "big"] and byteorder != sys.byteorder:
        # The byteorder needs to be fixed (a copy is made
Example #4
0
                print("[%s] %s" % (i, leaf[i]))

    if isinstance(leaf, Table) and options.colinfo:
        # Show info of columns
        for colname in leaf.colnames:
            print(repr(leaf.cols._f_col(colname)))

    if isinstance(leaf, Table) and options.idxinfo:
        # Show info of indexes
        for colname in leaf.colnames:
            col = leaf.cols._f_col(colname)
            if isinstance(col, Column) and col.index is not None:
                idx = col.index
                print(repr(idx))

dumpLeaf = previous_api(dump_leaf)


def dump_group(pgroup, sort=False):
    node_kinds = pgroup._v_file._node_kinds[1:]
    what = pgroup._f_walk_groups()
    if sort:
        what = sorted(what, key=operator.attrgetter('_v_pathname'))
    for group in what:
        print(str(group))
        if options.showattrs:
            print("  "+repr(group._v_attrs))
        for kind in node_kinds:
            for node in group._f_list_nodes(kind):
                if options.verbose or options.dump:
                    dump_leaf(node)
Example #5
0
        if nparr.shape != slice_shape:
            # Create an array compliant with the specified shape
            narr = numpy.empty(shape=slice_shape, dtype=self.atom.dtype)
            # Assign the value to it
            try:
                narr[...] = nparr
            except Exception, exc:  # XXX
                raise ValueError("value parameter '%s' cannot be converted "
                                 "into an array object compliant with %s: "
                                 "'%r' The error was: <%s>" % (
                                 nparr, self.__class__.__name__, self, exc))
            return narr
        return nparr

    _checkShape = previous_api(_check_shape)

    def _read_slice(self, startl, stopl, stepl, shape):
        """Read a slice based on `startl`, `stopl` and `stepl`."""

        nparr = numpy.empty(dtype=self.atom.dtype, shape=shape)
        # Protection against reading empty arrays
        if 0 not in shape:
            # Arrays that have non-zero dimensionality
            self._g_read_slice(startl, stopl, stepl, nparr)
        # For zero-shaped arrays, return the scalar
        if nparr.shape == ():
            nparr = nparr[()]
        return nparr

    _readSlice = previous_api(_read_slice)
    """Get the node class matching the `classname`.

    If the name is not registered, a ``TypeError`` is raised.  The empty
    string and ``None`` are also accepted, and mean the ``Node`` class.

    .. versionadded:: 3.0

    """

    # The empty string is accepted for compatibility
    # with old default arguments.
    if classname is None or classname == "":
        classname = "Node"

    # Get the class object corresponding to `classname`.
    if classname not in class_name_dict:
        raise TypeError("there is no registered node class named ``%s``" % (classname,))

    return class_name_dict[classname]


getClassByName = previous_api(get_class_by_name)


## Local Variables:
## mode: python
## py-indent-offset: 4
## tab-width: 4
## fill-column: 72
## End:
Example #7
0
        # Close everything else (i.e. indices)
        closenodes(prefix, [path for path in alivenodes],
                   lambda path: alivenodes[path])

        # Next, revive the dead nodes, close and delete them
        # so they are not placed in the limbo again.
        # These two steps ensure tables are closed *before* their indices.
        closenodes(
            prefix,
            [path for path in deadnodes if '/_i_' not in path],  # not indices
            lambda path: revivenode(path))
        # Close everything else (i.e. indices)
        closenodes(prefix, [path for path in deadnodes],
                   lambda path: revivenode(path))

    _g_closeDescendents = previous_api(_g_close_descendents)

    def _g_close(self):
        """Close this (open) group."""

        # hdf5extension operations:
        #   Close HDF5 group.
        self._g_close_group()

        # Close myself as a node.
        super(Group, self)._f_close()

    def _f_close(self):
        """Close this group and all its descendents.

        This method has the behavior described in :meth:`Node._f_close`.
Example #8
0
class AttributeSet(hdf5extension.AttributeSet, object):
    """Container for the HDF5 attributes of a Node.

    This class provides methods to create new HDF5 node attributes,
    and to get, rename or delete existing ones.

    Like in Group instances (see :ref:`GroupClassDescr`), AttributeSet
    instances make use of the *natural naming* convention, i.e. you can
    access the attributes on disk as if they were normal Python
    attributes of the AttributeSet instance.

    This offers the user a very convenient way to access HDF5 node
    attributes. However, for this reason and in order not to pollute the
    object namespace, one can not assign *normal* attributes to
    AttributeSet instances, and their members use names which start by
    special prefixes as happens with Group objects.

    .. rubric:: Notes on native and pickled attributes

    The values of most basic types are saved as HDF5 native data in the
    HDF5 file.  This includes Python bool, int, float, complex and str
    (but not long nor unicode) values, as well as their NumPy scalar
    versions and homogeneous or *structured* NumPy arrays of them.  When
    read, these values are always loaded as NumPy scalar or array
    objects, as needed.

    For that reason, attributes in native HDF5 files will be always
    mapped into NumPy objects.  Specifically, a multidimensional
    attribute will be mapped into a multidimensional ndarray and a
    scalar will be mapped into a NumPy scalar object (for example, a
    scalar H5T_NATIVE_LLONG will be read and returned as a numpy.int64
    scalar).

    However, other kinds of values are serialized using pickle, so you
    only will be able to correctly retrieve them using a Python-aware
    HDF5 library.  Thus, if you want to save Python scalar values and
    make sure you are able to read them with generic HDF5 tools, you
    should make use of *scalar or homogeneous/structured array NumPy
    objects* (for example, numpy.int64(1) or numpy.array([1, 2, 3],
    dtype='int16')).

    One more advice: because of the various potential difficulties in
    restoring a Python object stored in an attribute, you may end up
    getting a pickle string where a Python object is expected. If this
    is the case, you may wish to run pickle.loads() on that string to
    get an idea of where things went wrong, as shown in this example::

        >>> import os, tempfile
        >>> import tables
        >>>
        >>> class MyClass(object):
        ...   foo = 'bar'
        ...
        >>> myObject = MyClass()  # save object of custom class in HDF5 attr
        >>> h5fname = tempfile.mktemp(suffix='.h5')
        >>> h5f = tables.open_file(h5fname, 'w')
        >>> h5f.root._v_attrs.obj = myObject  # store the object
        >>> print(h5f.root._v_attrs.obj.foo)  # retrieve it
        bar
        >>> h5f.close()
        >>>
        >>> del MyClass, myObject  # delete class of object and reopen file
        >>> h5f = tables.open_file(h5fname, 'r')
        >>> print(repr(h5f.root._v_attrs.obj))
        'ccopy_reg\\n_reconstructor...
        >>> import pickle  # let's unpickle that to see what went wrong
        >>> pickle.loads(h5f.root._v_attrs.obj)
        Traceback (most recent call last):
        ...
        AttributeError: 'module' object has no attribute 'MyClass'
        >>> # So the problem was not in the stored object,
        ... # but in the *environment* where it was restored.
        ... h5f.close()
        >>> os.remove(h5fname)


    .. rubric:: Notes on AttributeSet methods

    Note that this class overrides the __getattr__(), __setattr__() and
    __delattr__() special methods.  This allows you to read, assign or
    delete attributes on disk by just using the next constructs::

        leaf.attrs.myattr = 'str attr'    # set a string (native support)
        leaf.attrs.myattr2 = 3            # set an integer (native support)
        leaf.attrs.myattr3 = [3, (1, 2)]  # a generic object (Pickled)
        attrib = leaf.attrs.myattr        # get the attribute ``myattr``
        del leaf.attrs.myattr             # delete the attribute ``myattr``

    In addition, the dictionary-like __getitem__(), __setitem__() and
    __delitem__() methods are available, so you may write things like
    this::

        for name in node._v_attrs._f_list():
            print("name: %s, value: %s" % (name, node._v_attrs[name]))

    Use whatever idiom you prefer to access the attributes.

    If an attribute is set on a target node that already has a large
    number of attributes, a PerformanceWarning will be issued.


    .. rubric:: AttributeSet attributes

    .. attribute:: _v_attrnames

        A list with all attribute names.

    .. attribute:: _v_attrnamessys

        A list with system attribute names.

    .. attribute:: _v_attrnamesuser

        A list with user attribute names.

    .. attribute:: _v_unimplemented

        A list of attribute names with unimplemented native HDF5 types.

    """
    def _g_getnode(self):
        return self._v__nodefile._get_node(self._v__nodepath)

    _v_node = property(
        _g_getnode, None, None,
        "The :class:`Node` instance this attribute set is "
        "associated with.")

    def __init__(self, node):
        """Create the basic structures to keep the attribute information.

        Reads all the HDF5 attributes (if any) on disk for the node "node".

        Parameters
        ----------
        node
            The parent node

        """

        # Refuse to create an instance of an already closed node
        if not node._v_isopen:
            raise ClosedNodeError("the node for attribute set is closed")

        dict_ = self.__dict__

        self._g_new(node)
        dict_["_v__nodefile"] = node._v_file
        dict_["_v__nodepath"] = node._v_pathname
        dict_["_v_attrnames"] = self._g_list_attr(node)
        # The list of unimplemented attribute names
        dict_["_v_unimplemented"] = []

        # Get the file version format. This is an optimization
        # in order to avoid accessing it too much.
        try:
            format_version = node._v_file.format_version
        except AttributeError:
            parsed_version = None
        else:
            if format_version == 'unknown':
                parsed_version = None
            else:
                parsed_version = tuple(map(int, format_version.split('.')))
        dict_["_v__format_version"] = parsed_version
        # Split the attribute list in system and user lists
        dict_["_v_attrnamessys"] = []
        dict_["_v_attrnamesuser"] = []
        for attr in self._v_attrnames:
            # put the attributes on the local dictionary to allow
            # tab-completion
            self.__getattr__(attr)
            if issysattrname(attr):
                self._v_attrnamessys.append(attr)
            else:
                self._v_attrnamesuser.append(attr)

        # Sort the attributes
        self._v_attrnames.sort()
        self._v_attrnamessys.sort()
        self._v_attrnamesuser.sort()

    def _g_update_node_location(self, node):
        """Updates the location information about the associated `node`."""

        dict_ = self.__dict__
        dict_['_v__nodefile'] = node._v_file
        dict_['_v__nodepath'] = node._v_pathname
        # hdf5extension operations:
        self._g_new(node)

    _g_updateNodeLocation = previous_api(_g_update_node_location)

    def _f_list(self, attrset='user'):
        """Get a list of attribute names.

        The attrset string selects the attribute set to be used.  A
        'user' value returns only user attributes (this is the default).
        A 'sys' value returns only system attributes.  Finally, 'all'
        returns both system and user attributes.

        """

        if attrset == "user":
            return self._v_attrnamesuser[:]
        elif attrset == "sys":
            return self._v_attrnamessys[:]
        elif attrset == "all":
            return self._v_attrnames[:]

    def __getattr__(self, name):
        """Get the attribute named "name"."""

        # If attribute does not exist, raise AttributeError
        if not name in self._v_attrnames:
            raise AttributeError("Attribute '%s' does not exist in node: "
                                 "'%s'" % (name, self._v__nodepath))

        # Read the attribute from disk. This is an optimization to read
        # quickly system attributes that are _string_ values, but it
        # takes care of other types as well as for example NROWS for
        # Tables and EXTDIM for EArrays
        format_version = self._v__format_version
        value = self._g_getattr(self._v_node, name)

        # Check whether the value is pickled
        # Pickled values always seems to end with a "."
        maybe_pickled = (
            isinstance(value, numpy.generic) and  # NumPy scalar?
            value.dtype.type == numpy.bytes_ and  # string type?
            value.itemsize > 0 and value.endswith(b'.'))

        if (maybe_pickled and value in [b"0", b"0."]):
            # Workaround for a bug in many versions of Python (starting
            # somewhere after Python 2.6.1).  See ticket #253.
            retval = value
        elif (maybe_pickled and _field_fill_re.match(name)
              and format_version == (1, 5)):
            # This format was used during the first 1.2 releases, just
            # for string defaults.
            try:
                retval = cPickle.loads(value)
                retval = numpy.array(retval)
            except ImportError:
                retval = None  # signal error avoiding exception
        elif maybe_pickled and name == 'FILTERS' and format_version < (2, 0):
            # This is a big hack, but we don't have other way to recognize
            # pickled filters of PyTables 1.x files.
            value = _old_filters_re.sub(_new_filters_sub, value, 1)
            retval = cPickle.loads(value)  # pass unpickling errors through
        elif maybe_pickled:
            try:
                retval = cPickle.loads(value)
            # except cPickle.UnpicklingError:
            # It seems that pickle may raise other errors than UnpicklingError
            # Perhaps it would be better just an "except:" clause?
            # except (cPickle.UnpicklingError, ImportError):
            # Definitely (see SF bug #1254636)
            except:
                # ivb (2005-09-07): It is too hard to tell
                # whether the unpickling failed
                # because of the string not being a pickle one at all,
                # because of a malformed pickle string,
                # or because of some other problem in object reconstruction,
                # thus making inconvenient even the issuing of a warning here.
                # The documentation contains a note on this issue,
                # explaining how the user can tell where the problem was.
                retval = value
            # Additional check for allowing a workaround for #307
            if isinstance(retval, unicode) and retval == u'':
                retval = numpy.array(retval)[()]
        elif name == 'FILTERS' and format_version >= (2, 0):
            retval = Filters._unpack(value)
        elif name == 'TITLE' and not isinstance(value, str):
            if sys.version_info[0] < 3:
                # unicode is OK for TITLE
                retval = value
            else:
                retval = value.decode('utf-8')
        elif (issysattrname(name) and isinstance(value, (bytes, unicode))
              and not isinstance(value, str)
              and not _field_fill_re.match(name)):
            # system attributes should always be str
            if sys.version_info[0] < 3:
                retval = value.encode()
            else:
                # python 3, bytes and not "FIELD_[0-9]+_FILL"
                retval = value.decode('utf-8')
        else:
            retval = value

        # Put this value in local directory
        self.__dict__[name] = retval
        return retval

    def _g__setattr(self, name, value):
        """Set a PyTables attribute.

        Sets a (maybe new) PyTables attribute with the specified `name`
        and `value`.  If the attribute already exists, it is simply
        replaced.

        It does not log the change.

        """

        # Save this attribute to disk
        # (overwriting an existing one if needed)
        stvalue = value
        if issysattrname(name):
            if name in ["EXTDIM", "AUTO_INDEX", "DIRTY", "NODE_TYPE_VERSION"]:
                stvalue = numpy.array(value, dtype=numpy.int32)
                value = stvalue[()]
            elif name == "NROWS":
                stvalue = numpy.array(value, dtype=SizeType)
                value = stvalue[()]
            elif name == "FILTERS" and self._v__format_version >= (2, 0):
                stvalue = value._pack()
                # value will remain as a Filters instance here
        # Convert value from a Python scalar into a NumPy scalar
        # (only in case it has not been converted yet)
        # Fixes ticket #59
        if (stvalue is value
                and type(value) in (bool, bytes, int, float, complex, unicode,
                                    numpy.unicode_)):
            # Additional check for allowing a workaround for #307
            if isinstance(value, unicode) and len(value) == 0:
                stvalue = numpy.array(u'')
            else:
                stvalue = numpy.array(value)
            value = stvalue[()]

        self._g_setattr(self._v_node, name, stvalue)

        # New attribute or value. Introduce it into the local
        # directory
        self.__dict__[name] = value

        # Finally, add this attribute to the list if not present
        attrnames = self._v_attrnames
        if not name in attrnames:
            attrnames.append(name)
            attrnames.sort()
            if issysattrname(name):
                attrnamessys = self._v_attrnamessys
                attrnamessys.append(name)
                attrnamessys.sort()
            else:
                attrnamesuser = self._v_attrnamesuser
                attrnamesuser.append(name)
                attrnamesuser.sort()

    def __setattr__(self, name, value):
        """Set a PyTables attribute.

        Sets a (maybe new) PyTables attribute with the specified `name`
        and `value`.  If the attribute already exists, it is simply
        replaced.

        A ``ValueError`` is raised when the name starts with a reserved
        prefix or contains a ``/``.  A `NaturalNameWarning` is issued if
        the name is not a valid Python identifier.  A
        `PerformanceWarning` is issued when the recommended maximum
        number of attributes in a node is going to be exceeded.

        """

        nodefile = self._v__nodefile
        attrnames = self._v_attrnames

        # Check for name validity
        check_name_validity(name)

        nodefile._check_writable()

        # Check if there are too many attributes.
        max_node_attrs = nodefile.params['MAX_NODE_ATTRS']
        if len(attrnames) >= max_node_attrs:
            warnings.warn(
                """\
node ``%s`` is exceeding the recommended maximum number of attributes (%d);\
be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" %
                (self._v__nodepath, max_node_attrs), PerformanceWarning)

        undo_enabled = nodefile.is_undo_enabled()
        # Log old attribute removal (if any).
        if undo_enabled and (name in attrnames):
            self._g_del_and_log(name)

        # Set the attribute.
        self._g__setattr(name, value)

        # Log new attribute addition.
        if undo_enabled:
            self._g_log_add(name)

    def _g_log_add(self, name):
        self._v__nodefile._log('ADDATTR', self._v__nodepath, name)

    _g_logAdd = previous_api(_g_log_add)

    def _g_del_and_log(self, name):
        nodefile = self._v__nodefile
        node_pathname = self._v__nodepath
        # Log *before* moving to use the right shadow name.
        nodefile._log('DELATTR', node_pathname, name)
        attr_to_shadow(nodefile, node_pathname, name)

    _g_delAndLog = previous_api(_g_del_and_log)

    def _g__delattr(self, name):
        """Delete a PyTables attribute.

        Deletes the specified existing PyTables attribute.

        It does not log the change.

        """

        # Delete the attribute from disk
        self._g_remove(self._v_node, name)

        # Delete the attribute from local lists
        self._v_attrnames.remove(name)
        if name in self._v_attrnamessys:
            self._v_attrnamessys.remove(name)
        else:
            self._v_attrnamesuser.remove(name)

        # Delete the attribute from the local directory
        # closes (#1049285)
        del self.__dict__[name]

    def __delattr__(self, name):
        """Delete a PyTables attribute.

        Deletes the specified existing PyTables attribute from the
        attribute set.  If a nonexistent or system attribute is
        specified, an ``AttributeError`` is raised.

        """

        nodefile = self._v__nodefile

        # Check if attribute exists
        if name not in self._v_attrnames:
            raise AttributeError(
                "Attribute ('%s') does not exist in node '%s'" %
                (name, self._v__nodepath))

        nodefile._check_writable()

        # Remove the PyTables attribute or move it to shadow.
        if nodefile.is_undo_enabled():
            self._g_del_and_log(name)
        else:
            self._g__delattr(name)

    def __getitem__(self, name):
        """The dictionary like interface for __getattr__()."""

        try:
            return self.__getattr__(name)
        except AttributeError:
            # Capture the AttributeError an re-raise a KeyError one
            raise KeyError("Attribute ('%s') does not exist in node '%s'" %
                           (name, self._v__nodepath))

    def __setitem__(self, name, value):
        """The dictionary like interface for __setattr__()."""

        self.__setattr__(name, value)

    def __delitem__(self, name):
        """The dictionary like interface for __delattr__()."""

        try:
            self.__delattr__(name)
        except AttributeError:
            # Capture the AttributeError an re-raise a KeyError one
            raise KeyError("Attribute ('%s') does not exist in node '%s'" %
                           (name, self._v__nodepath))

    def __contains__(self, name):
        """Is there an attribute with that name?

        A true value is returned if the attribute set has an attribute
        with the given name, false otherwise.

        """

        return name in self._v_attrnames

    def _f_rename(self, oldattrname, newattrname):
        """Rename an attribute from oldattrname to newattrname."""

        if oldattrname == newattrname:
            # Do nothing
            return

        # First, fetch the value of the oldattrname
        attrvalue = getattr(self, oldattrname)

        # Now, create the new attribute
        setattr(self, newattrname, attrvalue)

        # Finally, remove the old attribute
        delattr(self, oldattrname)

    def _g_copy(self, newset, set_attr=None, copyclass=False):
        """Copy set attributes.

        Copies all user and allowed system PyTables attributes to the
        given attribute set, replacing the existing ones.

        You can specify a *bound* method of the destination set that
        will be used to set its attributes.  Else, its `_g__setattr`
        method will be used.

        Changes are logged depending on the chosen setting method.  The
        default setting method does not log anything.

        .. versionchanged:: 3.0
           The *newSet* parameter has been renamed into *newset*.

        .. versionchanged:: 3.0
           The *copyClass* parameter has been renamed into *copyclass*.

        """

        copysysattrs = newset._v__nodefile.params['PYTABLES_SYS_ATTRS']
        if set_attr is None:
            set_attr = newset._g__setattr

        for attrname in self._v_attrnamesuser:
            # Do not copy the unimplemented attributes.
            if attrname not in self._v_unimplemented:
                set_attr(attrname, getattr(self, attrname))
        # Copy the system attributes that we are allowed to.
        if copysysattrs:
            for attrname in self._v_attrnamessys:
                if ((attrname not in SYS_ATTRS_NOTTOBECOPIED) and
                        # Do not copy the FIELD_ attributes in tables as this can
                        # be really *slow* (don't know exactly the reason).
                        # See #304.
                        not attrname.startswith("FIELD_")):
                    set_attr(attrname, getattr(self, attrname))
            # Copy CLASS and VERSION attributes if requested
            if copyclass:
                for attrname in FORCE_COPY_CLASS:
                    if attrname in self._v_attrnamessys:
                        set_attr(attrname, getattr(self, attrname))

    def _f_copy(self, where):
        """Copy attributes to the where node.

        Copies all user and certain system attributes to the given where
        node (a Node instance - see :ref:`NodeClassDescr`), replacing
        the existing ones.

        """

        # AttributeSet must be defined in order to define a Node.
        # However, we need to know Node here.
        # Using class_name_dict avoids a circular import.
        if not isinstance(where, class_name_dict['Node']):
            raise TypeError("destination object is not a node: %r" % (where, ))
        self._g_copy(where._v_attrs, where._v_attrs.__setattr__)

    def _g_close(self):
        # Nothing will be done here, as the existing instance is completely
        # operative now.
        pass

    def __str__(self):
        """The string representation for this object."""

        # The pathname
        pathname = self._v__nodepath
        # Get this class name
        classname = self.__class__.__name__
        # The attribute names
        attrnumber = len([n for n in self._v_attrnames])
        return "%s._v_attrs (%s), %s attributes" % \
               (pathname, classname, attrnumber)

    def __repr__(self):
        """A detailed string representation for this object."""

        # print additional info only if there are attributes to show
        attrnames = [n for n in self._v_attrnames]
        if len(attrnames):
            rep = [
                '%s := %r' % (attr, getattr(self, attr)) for attr in attrnames
            ]
            attrlist = '[%s]' % (',\n    '.join(rep))

            return "%s:\n   %s" % (str(self), attrlist)
        else:
            return str(self)
Example #9
0
class ProxyDict(dict):
    """A dictionary which uses a container object to store its values."""

    containerRef = previous_api_property('containerref')

    def __init__(self, container):
        self.containerref = weakref.ref(container)
        """A weak reference to the container object.

        .. versionchanged:: 3.0
           The *containerRef* attribute has been renamed into
           *containerref*.

        """

    def __getitem__(self, key):
        if key not in self:
            raise KeyError(key)

        # Values are not actually stored to avoid extra references.
        return self._get_value_from_container(self._get_container(), key)

    def __setitem__(self, key, value):
        # Values are not actually stored to avoid extra references.
        super(ProxyDict, self).__setitem__(key, None)

    def __repr__(self):
        return object.__repr__(self)

    def __str__(self):
        # C implementation does not use `self.__getitem__()`. :(
        itemFormat = '%r: %r'
        itemReprs = [itemFormat % item for item in self.iteritems()]
        return '{%s}' % ', '.join(itemReprs)

    def values(self):
        # C implementation does not use `self.__getitem__()`. :(
        valueList = []
        for key in self.iterkeys():
            valueList.append(self[key])
        return valueList

    def itervalues(self):
        # C implementation does not use `self.__getitem__()`. :(
        for key in self.iterkeys():
            yield self[key]
        raise StopIteration

    def items(self):
        # C implementation does not use `self.__getitem__()`. :(
        itemList = []
        for key in self.iterkeys():
            itemList.append((key, self[key]))
        return itemList

    def iteritems(self):
        # C implementation does not use `self.__getitem__()`. :(
        for key in self.iterkeys():
            yield (key, self[key])
        raise StopIteration

    def _get_container(self):
        container = self.containerref()
        if container is None:
            raise ValueError("the container object does no longer exist")
        return container

    _getContainer = previous_api(_get_container)
Example #10
0
    if isinstance(leaf, Table) and options.colinfo:
        # Show info of columns
        for colname in leaf.colnames:
            print(repr(leaf.cols._f_col(colname)))

    if isinstance(leaf, Table) and options.idxinfo:
        # Show info of indexes
        for colname in leaf.colnames:
            col = leaf.cols._f_col(colname)
            if isinstance(col, Column) and col.index is not None:
                idx = col.index
                print(repr(idx))


dumpLeaf = previous_api(dump_leaf)


def dump_group(pgroup):
    node_kinds = pgroup._v_file._node_kinds[1:]
    for group in pgroup._f_walk_groups():
        print(str(group))
        if options.showattrs:
            print("  " + repr(group._v_attrs))
        for kind in node_kinds:
            for node in group._f_list_nodes(kind):
                if options.verbose or options.dump:
                    dump_leaf(node)
                else:
                    print(str(node))
Example #11
0
    elif indsize == 2:  # light
        chunksize, slicesize = ccs_light(optlevel, chunksize, slicesize)
    elif indsize == 4:  # medium
        chunksize, slicesize = ccs_medium(optlevel, chunksize, slicesize)
    elif indsize == 8:  # full
        chunksize, slicesize = ccs_full(optlevel, chunksize, slicesize)

    # Finally, compute blocksize and superblocksize
    blocksize = computeblocksize(expectedrows, slicesize, chunksize)
    superblocksize = computeblocksize(expectedrows, blocksize, slicesize)
    # The size for different blocks information
    sizes = (superblocksize, blocksize, slicesize, chunksize)
    return sizes


calcChunksize = previous_api(calc_chunksize)


def ccs_ultralight(optlevel, chunksize, slicesize):
    """Correct the slicesize and the chunksize based on optlevel."""

    if optlevel in (0, 1, 2):
        slicesize //= 2
        slicesize += optlevel * slicesize
    elif optlevel in (3, 4, 5):
        slicesize *= optlevel - 1
    elif optlevel in (6, 7, 8):
        slicesize *= optlevel - 1
    elif optlevel == 9:
        slicesize *= optlevel - 1
    return chunksize, slicesize
Example #12
0
class IndexArray(NotLoggedMixin, EArray, indexesextension.IndexArray):
    """Represent the index (sorted or reverse index) dataset in HDF5 file.

    All NumPy typecodes are supported except for complex datatypes.

    Parameters
    ----------
    parentnode
        The Index class from which this object will hang off.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*.

    name : str
        The name of this node in its parent group.
    atom
        An Atom object representing the shape and type of the atomic objects to
        be saved. Only scalar atoms are supported.
    title
        Sets a TITLE attribute on the array entity.
    filters : Filters
        An instance of the Filters class that provides information about the
        desired I/O filters to be applied during the life of this object.
    byteorder
        The byteroder of the data on-disk.

    """

    # Class identifier.
    _c_classid = 'INDEXARRAY'

    _c_classId = previous_api_property('_c_classid')

    # Properties
    # ~~~~~~~~~~
    chunksize = property(lambda self: self.chunkshape[1], None, None,
                         """The chunksize for this object.""")

    slicesize = property(lambda self: self.shape[1], None, None,
                         """The slicesize for this object.""")

    # Other methods
    # ~~~~~~~~~~~~~
    def __init__(self,
                 parentnode,
                 name,
                 atom=None,
                 title="",
                 filters=None,
                 byteorder=None):
        """Create an IndexArray instance."""

        self._v_pathname = parentnode._g_join(name)
        if atom is not None:
            # The shape and chunkshape needs to be fixed here
            if name == "sorted":
                reduction = parentnode.reduction
                shape = (0, parentnode.slicesize // reduction)
                chunkshape = (1, parentnode.chunksize // reduction)
            else:
                shape = (0, parentnode.slicesize)
                chunkshape = (1, parentnode.chunksize)
        else:
            # The shape and chunkshape will be read from disk later on
            shape = None
            chunkshape = None

        super(IndexArray, self).__init__(parentnode,
                                         name,
                                         atom,
                                         shape,
                                         title,
                                         filters,
                                         chunkshape=chunkshape,
                                         byteorder=byteorder)

    # This version of searchBin uses both ranges (1st level) and
    # bounds (2nd level) caches. It uses a cache for boundary rows,
    # but not for 'sorted' rows (this is only supported for the
    # 'optimized' types).
    def _search_bin(self, nrow, item):
        item1, item2 = item
        result1 = -1
        result2 = -1
        hi = self.shape[1]
        ranges = self._v_parent.rvcache
        boundscache = self.boundscache
        # First, look at the beginning of the slice
        begin = ranges[nrow, 0]
        # Look for items at the beginning of sorted slices
        if item1 <= begin:
            result1 = 0
        if item2 < begin:
            result2 = 0
        if result1 >= 0 and result2 >= 0:
            return (result1, result2)
        # Then, look for items at the end of the sorted slice
        end = ranges[nrow, 1]
        if result1 < 0:
            if item1 > end:
                result1 = hi
        if result2 < 0:
            if item2 >= end:
                result2 = hi
        if result1 >= 0 and result2 >= 0:
            return (result1, result2)
        # Finally, do a lookup for item1 and item2 if they were not found
        # Lookup in the middle of slice for item1
        chunksize = self.chunksize  # Number of elements/chunksize
        nchunk = -1
        # Try to get the bounds row from the LRU cache
        nslot = boundscache.getslot(nrow)
        if nslot >= 0:
            # Cache hit. Use the row kept there.
            bounds = boundscache.getitem(nslot)
        else:
            # No luck with cached data. Read the row and put it in the cache.
            bounds = self._v_parent.bounds[nrow]
            size = bounds.size * bounds.itemsize
            boundscache.setitem(nrow, bounds, size)
        if result1 < 0:
            # Search the appropriate chunk in bounds cache
            nchunk = bisect_left(bounds, item1)
            chunk = self._read_sorted_slice(nrow, chunksize * nchunk,
                                            chunksize * (nchunk + 1))
            result1 = self._bisect_left(chunk, item1, chunksize)
            result1 += chunksize * nchunk
        # Lookup in the middle of slice for item2
        if result2 < 0:
            # Search the appropriate chunk in bounds cache
            nchunk2 = bisect_right(bounds, item2)
            if nchunk2 != nchunk:
                chunk = self._read_sorted_slice(nrow, chunksize * nchunk2,
                                                chunksize * (nchunk2 + 1))
            result2 = self._bisect_right(chunk, item2, chunksize)
            result2 += chunksize * nchunk2
        return (result1, result2)

    _searchBin = previous_api(_search_bin)

    def __str__(self):
        "A compact representation of this class"
        return "IndexArray(path=%s)" % self._v_pathname

    def __repr__(self):
        """A verbose representation of this class."""

        return """%s
  atom = %r
  shape = %s
  nrows = %s
  chunksize = %s
  slicesize = %s
  byteorder = %r""" % (self, self.atom, self.shape, self.nrows, self.chunksize,
                       self.slicesize, self.byteorder)
Example #13
0
class Array(hdf5extension.Array, Leaf):
    """This class represents homogeneous datasets in an HDF5 file.

    This class provides methods to write or read data to or from array objects
    in the file. This class does not allow you neither to enlarge nor compress
    the datasets on disk; use the EArray class (see :ref:`EArrayClassDescr`) if
    you want enlargeable dataset support or compression features, or CArray
    (see :ref:`CArrayClassDescr`) if you just want compression.

    An interesting property of the Array class is that it remembers the
    *flavor* of the object that has been saved so that if you saved, for
    example, a list, you will get a list during readings afterwards; if you
    saved a NumPy array, you will get a NumPy object, and so forth.

    Note that this class inherits all the public attributes and methods that
    Leaf (see :ref:`LeafClassDescr`) already provides. However, as Array
    instances have no internal I/O buffers, it is not necessary to use the
    flush() method they inherit from Leaf in order to save their internal state
    to disk.  When a writing method call returns, all the data is already on
    disk.

    Parameters
    ----------
    parentnode
        The parent :class:`Group` object.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*

    name : str
        The name of this node in its parent group.
    obj
        The array or scalar to be saved.  Accepted types are NumPy
        arrays and scalars as well as native Python sequences and
        scalars, provided that values are regular (i.e. they are not
        like ``[[1,2],2]``) and homogeneous (i.e. all the elements are
        of the same type).

        .. versionchanged:: 3.0
           Renamed form *object* into *obj*.
    title
        A description for this node (it sets the ``TITLE`` HDF5 attribute on
        disk).
    byteorder
        The byteorder of the data *on disk*, specified as 'little' or 'big'.
        If this is not specified, the byteorder is that of the given `object`.

    """

    # Class identifier.
    _c_classid = 'ARRAY'

    _c_classId = previous_api_property('_c_classid')
    _v_objectId = previous_api_property('_v_objectid')

    # Lazy read-only attributes
    # `````````````````````````
    @lazyattr
    def dtype(self):
        """The NumPy ``dtype`` that most closely matches this array."""

        return self.atom.dtype

    # Properties
    # ~~~~~~~~~~
    def _getnrows(self):
        if self.shape == ():
            return SizeType(1)  # scalar case
        else:
            return self.shape[self.maindim]

    nrows = property(_getnrows, None, None, "The number of rows in the array.")

    def _getrowsize(self):
        maindim = self.maindim
        rowsize = self.atom.size
        for i, dim in enumerate(self.shape):
            if i != maindim:
                rowsize *= dim
        return rowsize

    rowsize = property(
        _getrowsize, None, None,
        "The size of the rows in bytes in dimensions orthogonal to *maindim*.")

    size_in_memory = property(
        lambda self: self.nrows * self.rowsize, None, None,
        """The size of this array's data in bytes when it is fully loaded into
        memory.""")

    # Other methods
    # ~~~~~~~~~~~~~
    def __init__(self,
                 parentnode,
                 name,
                 obj=None,
                 title="",
                 byteorder=None,
                 _log=True,
                 _atom=None):

        self._v_version = None
        """The object version of this array."""
        self._v_new = new = obj is not None
        """Is this the first time the node has been created?"""
        self._v_new_title = title
        """New title for this node."""
        self._obj = obj
        """The object to be stored in the array.  It can be any of numpy,
        list, tuple, string, integer of floating point types, provided
        that they are regular (i.e. they are not like ``[[1, 2], 2]``).

        .. versionchanged:: 3.0
           Renamed form *_object* into *_obj*.

        """

        self._v_convert = True
        """Whether the ``Array`` object must be converted or not."""

        # Miscellaneous iteration rubbish.
        self._start = None
        """Starting row for the current iteration."""
        self._stop = None
        """Stopping row for the current iteration."""
        self._step = None
        """Step size for the current iteration."""
        self._nrowsread = None
        """Number of rows read up to the current state of iteration."""
        self._startb = None
        """Starting row for current buffer."""
        self._stopb = None
        """Stopping row for current buffer. """
        self._row = None
        """Current row in iterators (sentinel)."""
        self._init = False
        """Whether we are in the middle of an iteration or not (sentinel)."""
        self.listarr = None
        """Current buffer in iterators."""

        # Documented (*public*) attributes.
        self.atom = _atom
        """An Atom (see :ref:`AtomClassDescr`) instance representing the *type*
        and *shape* of the atomic objects to be saved.
        """
        self.shape = None
        """The shape of the stored array."""
        self.nrow = None
        """On iterators, this is the index of the current row."""
        self.extdim = -1  # ordinary arrays are not enlargeable
        """The index of the enlargeable dimension."""

        # Ordinary arrays have no filters: leaf is created with default ones.
        super(Array, self).__init__(parentnode, name, new, Filters(),
                                    byteorder, _log)

    def _g_create(self):
        """Save a new array in file."""

        self._v_version = obversion
        try:
            # `Leaf._g_post_init_hook()` should be setting the flavor on disk.
            self._flavor = flavor = flavor_of(self._obj)
            nparr = array_as_internal(self._obj, flavor)
        except:  # XXX
            # Problems converting data. Close the node and re-raise exception.
            self.close(flush=0)
            raise

        # Raise an error in case of unsupported object
        if nparr.dtype.kind in ['V', 'U', 'O']:  # in void, unicode, object
            raise TypeError("Array objects cannot currently deal with void, "
                            "unicode or object arrays")

        # Decrease the number of references to the object
        self._obj = None

        # Fix the byteorder of data
        nparr = self._g_fix_byteorder_data(nparr, nparr.dtype.byteorder)

        # Create the array on-disk
        try:
            # ``self._v_objectid`` needs to be set because would be
            # needed for setting attributes in some descendants later
            # on
            (self._v_objectid, self.shape,
             self.atom) = self._create_array(nparr, self._v_new_title,
                                             self.atom)
        except:  # XXX
            # Problems creating the Array on disk. Close node and re-raise.
            self.close(flush=0)
            raise

        # Compute the optimal buffer size
        self.nrowsinbuf = self._calc_nrowsinbuf()
        # Arrays don't have chunkshapes (so, set it to None)
        self._v_chunkshape = None

        return self._v_objectid

    def _g_open(self):
        """Get the metadata info for an array in file."""

        (oid, self.atom, self.shape, self._v_chunkshape) = self._open_array()

        self.nrowsinbuf = self._calc_nrowsinbuf()

        return oid

    def get_enum(self):
        """Get the enumerated type associated with this array.

        If this array is of an enumerated type, the corresponding Enum instance
        (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated
        type, a TypeError is raised.

        """

        if self.atom.kind != 'enum':
            raise TypeError("array ``%s`` is not of an enumerated type" %
                            self._v_pathname)

        return self.atom.enum

    getEnum = previous_api(get_enum)

    def iterrows(self, start=None, stop=None, step=None):
        """Iterate over the rows of the array.

        This method returns an iterator yielding an object of the current
        flavor for each selected row in the array.  The returned rows are taken
        from the *main dimension*.

        If a range is not supplied, *all the rows* in the array are iterated
        upon - you can also use the :meth:`Array.__iter__` special method for
        that purpose.  If you only want to iterate over a given *range of rows*
        in the array, you may use the start, stop and step parameters.

        Examples
        --------

        ::

            result = [row for row in arrayInstance.iterrows(step=4)]

        .. versionchanged:: 3.0
           If the *start* parameter is provided and *stop* is None then the
           array is iterated from *start* to the last line.
           In PyTables < 3.0 only one element was returned.

        """

        try:
            (self._start, self._stop,
             self._step) = self._process_range(start, stop, step)
        except IndexError:
            # If problems with indexes, silently return the null tuple
            return ()
        self._init_loop()
        return self

    def __iter__(self):
        """Iterate over the rows of the array.

        This is equivalent to calling :meth:`Array.iterrows` with default
        arguments, i.e. it iterates over *all the rows* in the array.

        Examples
        --------

        ::

            result = [row[2] for row in array]

        Which is equivalent to::

            result = [row[2] for row in array.iterrows()]

        """

        if not self._init:
            # If the iterator is called directly, assign default variables
            self._start = 0
            self._stop = self.nrows
            self._step = 1
            # and initialize the loop
            self._init_loop()
        return self

    def _init_loop(self):
        """Initialization for the __iter__ iterator."""

        self._nrowsread = self._start
        self._startb = self._start
        self._row = -1  # Sentinel
        self._init = True  # Sentinel
        self.nrow = SizeType(self._start - self._step)  # row number

    _initLoop = previous_api(_init_loop)

    def next(self):
        """Get the next element of the array during an iteration.

        The element is returned as an object of the current flavor.

        """

        # this could probably be sped up for long iterations by reusing the
        # listarr buffer
        if self._nrowsread >= self._stop:
            self._init = False
            self.listarr = None  # fixes issue #308
            raise StopIteration  # end of iteration
        else:
            # Read a chunk of rows
            if self._row + 1 >= self.nrowsinbuf or self._row < 0:
                self._stopb = self._startb + self._step * self.nrowsinbuf
                # Protection for reading more elements than needed
                if self._stopb > self._stop:
                    self._stopb = self._stop
                listarr = self._read(self._startb, self._stopb, self._step)
                # Swap the axes to easy the return of elements
                if self.extdim > 0:
                    listarr = listarr.swapaxes(self.extdim, 0)
                self.listarr = internal_to_flavor(listarr, self.flavor)
                self._row = -1
                self._startb = self._stopb
            self._row += 1
            self.nrow += self._step
            self._nrowsread += self._step
            # Fixes bug #968132
            # if self.listarr.shape:
            if self.shape:
                return self.listarr[self._row]
            else:
                return self.listarr  # Scalar case

    def _interpret_indexing(self, keys):
        """Internal routine used by __getitem__ and __setitem__"""

        maxlen = len(self.shape)
        shape = (maxlen, )
        startl = numpy.empty(shape=shape, dtype=SizeType)
        stopl = numpy.empty(shape=shape, dtype=SizeType)
        stepl = numpy.empty(shape=shape, dtype=SizeType)
        stop_None = numpy.zeros(shape=shape, dtype=SizeType)
        if not isinstance(keys, tuple):
            keys = (keys, )
        nkeys = len(keys)
        dim = 0
        # Here is some problem when dealing with [...,...] params
        # but this is a bit weird way to pass parameters anyway
        for key in keys:
            ellipsis = 0  # Sentinel
            if isinstance(key, type(Ellipsis)):
                ellipsis = 1
                for diml in xrange(dim, len(self.shape) - (nkeys - dim) + 1):
                    startl[dim] = 0
                    stopl[dim] = self.shape[diml]
                    stepl[dim] = 1
                    dim += 1
            elif dim >= maxlen:
                raise IndexError("Too many indices for object '%s'" %
                                 self._v_pathname)
            elif is_idx(key):
                key = operator.index(key)

                # Protection for index out of range
                if key >= self.shape[dim]:
                    raise IndexError("Index out of range")
                if key < 0:
                    # To support negative values (Fixes bug #968149)
                    key += self.shape[dim]
                start, stop, step = self._process_range(key,
                                                        key + 1,
                                                        1,
                                                        dim=dim)
                stop_None[dim] = 1
            elif isinstance(key, slice):
                start, stop, step = self._process_range(key.start,
                                                        key.stop,
                                                        key.step,
                                                        dim=dim)
            else:
                raise TypeError("Non-valid index or slice: %s" % key)
            if not ellipsis:
                startl[dim] = start
                stopl[dim] = stop
                stepl[dim] = step
                dim += 1

        # Complete the other dimensions, if needed
        if dim < len(self.shape):
            for diml in xrange(dim, len(self.shape)):
                startl[dim] = 0
                stopl[dim] = self.shape[diml]
                stepl[dim] = 1
                dim += 1

        # Compute the shape for the container properly. Fixes #1288792
        shape = []
        for dim in xrange(len(self.shape)):
            # The negative division operates differently with python scalars
            # and numpy scalars (which are similar to C conventions). See:
            # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3
            # and
            # http://www.peterbe.com/Integer-division-in-programming-languages
            # for more info on this issue.
            # I've finally decided to rely on the len(xrange) function.
            # F. Alted 2006-09-25
            # Switch to `lrange` to allow long ranges (see #99).
            # use xrange, since it supports large integers as of Python 2.6
            # see github #181
            new_dim = len(xrange(startl[dim], stopl[dim], stepl[dim]))
            if not (new_dim == 1 and stop_None[dim]):
                shape.append(new_dim)

        return startl, stopl, stepl, shape

    def _fancy_selection(self, args):
        """Performs a NumPy-style fancy selection in `self`.

        Implements advanced NumPy-style selection operations in
        addition to the standard slice-and-int behavior.

        Indexing arguments may be ints, slices or lists of indices.

        Note: This is a backport from the h5py project.

        """

        # Internal functions

        def validate_number(num, length):
            """Validate a list member for the given axis length."""

            try:
                num = long(num)
            except TypeError:
                raise TypeError("Illegal index: %r" % num)
            if num > length - 1:
                raise IndexError("Index out of bounds: %d" % num)

        def expand_ellipsis(args, rank):
            """Expand ellipsis objects and fill in missing axes."""

            n_el = sum(1 for arg in args if arg is Ellipsis)
            if n_el > 1:
                raise IndexError("Only one ellipsis may be used.")
            elif n_el == 0 and len(args) != rank:
                args = args + (Ellipsis, )

            final_args = []
            n_args = len(args)
            for idx, arg in enumerate(args):
                if arg is Ellipsis:
                    final_args.extend((slice(None), ) * (rank - n_args + 1))
                else:
                    final_args.append(arg)

            if len(final_args) > rank:
                raise IndexError("Too many indices.")

            return final_args

        def translate_slice(exp, length):
            """Given a slice object, return a 3-tuple (start, count, step)

            This is for for use with the hyperslab selection routines.

            """

            start, stop, step = exp.start, exp.stop, exp.step
            if start is None:
                start = 0
            else:
                start = long(start)
            if stop is None:
                stop = length
            else:
                stop = long(stop)
            if step is None:
                step = 1
            else:
                step = long(step)

            if step < 1:
                raise IndexError("Step must be >= 1 (got %d)" % step)
            if stop == start:
                raise IndexError("Zero-length selections are not allowed")
            if stop < start:
                raise IndexError("Reverse-order selections are not allowed")
            if start < 0:
                start = length + start
            if stop < 0:
                stop = length + stop

            if not 0 <= start <= (length - 1):
                raise IndexError("Start index %s out of range (0-%d)" %
                                 (start, length - 1))
            if not 1 <= stop <= length:
                raise IndexError("Stop index %s out of range (1-%d)" %
                                 (stop, length))

            count = (stop - start) // step
            if (stop - start) % step != 0:
                count += 1

            if start + count > length:
                raise IndexError("Selection out of bounds (%d; axis has %d)" %
                                 (start + count, length))

            return start, count, step

        # Main code for _fancy_selection
        mshape = []
        selection = []

        if not isinstance(args, tuple):
            args = (args, )

        args = expand_ellipsis(args, len(self.shape))

        list_seen = False
        reorder = None
        for idx, (exp, length) in enumerate(zip(args, self.shape)):
            if isinstance(exp, slice):
                start, count, step = translate_slice(exp, length)
                selection.append((start, count, step, idx, "AND"))
                mshape.append(count)
            else:
                try:
                    exp = list(exp)
                except TypeError:
                    exp = [exp]  # Handle scalar index as a list of length 1
                    mshape.append(0)  # Keep track of scalar index for NumPy
                else:
                    mshape.append(len(exp))
                if len(exp) == 0:
                    raise IndexError(
                        "Empty selections are not allowed (axis %d)" % idx)
                elif len(exp) > 1:
                    if list_seen:
                        raise IndexError("Only one selection list is allowed")
                    else:
                        list_seen = True
                else:
                    if (not isinstance(exp[0], (int, long, numpy.integer)) or
                        (isinstance(exp[0], numpy.ndarray) and
                         not numpy.issubdtype(exp[0].dtype, numpy.integer))):
                        raise TypeError("Only integer coordinates allowed.")

                nexp = numpy.asarray(exp, dtype="i8")
                # Convert negative values
                nexp = numpy.where(nexp < 0, length + nexp, nexp)
                # Check whether the list is ordered or not
                # (only one unordered list is allowed)
                if not len(nexp) == len(numpy.unique(nexp)):
                    raise IndexError(
                        "Selection lists cannot have repeated values")
                neworder = nexp.argsort()
                if (neworder.shape != (len(exp), ) or numpy.sum(
                        numpy.abs(neworder - numpy.arange(len(exp)))) != 0):
                    if reorder is not None:
                        raise IndexError(
                            "Only one selection list can be unordered")
                    corrected_idx = sum(1 for x in mshape if x != 0) - 1
                    reorder = (corrected_idx, neworder)
                    nexp = nexp[neworder]
                for select_idx in xrange(len(nexp) + 1):
                    # This crazy piece of code performs a list selection
                    # using HDF5 hyperslabs.
                    # For each index, perform a "NOTB" selection on every
                    # portion of *this axis* which falls *outside* the list
                    # selection.  For this to work, the input array MUST be
                    # monotonically increasing.
                    if select_idx < len(nexp):
                        validate_number(nexp[select_idx], length)
                    if select_idx == 0:
                        start = 0
                        count = nexp[0]
                    elif select_idx == len(nexp):
                        start = nexp[-1] + 1
                        count = length - start
                    else:
                        start = nexp[select_idx - 1] + 1
                        count = nexp[select_idx] - start
                    if count > 0:
                        selection.append((start, count, 1, idx, "NOTB"))

        mshape = tuple(x for x in mshape if x != 0)
        return selection, reorder, mshape

    _fancySelection = previous_api(_fancy_selection)

    def __getitem__(self, key):
        """Get a row, a range of rows or a slice from the array.

        The set of tokens allowed for the key is the same as that for extended
        slicing in Python (including the Ellipsis or ... token).  The result is
        an object of the current flavor; its shape depends on the kind of slice
        used as key and the shape of the array itself.

        Furthermore, NumPy-style fancy indexing, where a list of indices in a
        certain axis is specified, is also supported.  Note that only one list
        per selection is supported right now.  Finally, NumPy-style point and
        boolean selections are supported as well.

        Examples
        --------

        ::

            array1 = array[4]                       # simple selection
            array2 = array[4:1000:2]                # slice selection
            array3 = array[1, ..., ::2, 1:4, 4:]    # general slice selection
            array4 = array[1, [1,5,10], ..., -1]    # fancy selection
            array5 = array[np.where(array[:] > 4)]  # point selection
            array6 = array[array[:] > 4]            # boolean selection

        """

        self._g_check_open()

        try:
            # First, try with a regular selection
            startl, stopl, stepl, shape = self._interpret_indexing(key)
            arr = self._read_slice(startl, stopl, stepl, shape)
        except TypeError:
            # Then, try with a point-wise selection
            try:
                coords = self._point_selection(key)
                arr = self._read_coords(coords)
            except TypeError:
                # Finally, try with a fancy selection
                selection, reorder, shape = self._fancy_selection(key)
                arr = self._read_selection(selection, reorder, shape)

        if self.flavor == "numpy" or not self._v_convert:
            return arr

        return internal_to_flavor(arr, self.flavor)

    def __setitem__(self, key, value):
        """Set a row, a range of rows or a slice in the array.

        It takes different actions depending on the type of the key parameter:
        if it is an integer, the corresponding array row is set to value (the
        value is broadcast when needed).  If key is a slice, the row slice
        determined by it is set to value (as usual, if the slice to be updated
        exceeds the actual shape of the array, only the values in the existing
        range are updated).

        If value is a multidimensional object, then its shape must be
        compatible with the shape determined by key, otherwise, a ValueError
        will be raised.

        Furthermore, NumPy-style fancy indexing, where a list of indices in a
        certain axis is specified, is also supported.  Note that only one list
        per selection is supported right now.  Finally, NumPy-style point and
        boolean selections are supported as well.

        Examples
        --------

        ::

            a1[0] = 333        # assign an integer to a Integer Array row
            a2[0] = 'b'        # assign a string to a string Array row
            a3[1:4] = 5        # broadcast 5 to slice 1:4
            a4[1:4:2] = 'xXx'  # broadcast 'xXx' to slice 1:4:2

            # General slice update (a5.shape = (4,3,2,8,5,10).
            a5[1, ..., ::2, 1:4, 4:] = numpy.arange(1728, shape=(4,3,2,4,3,6))
            a6[1, [1,5,10], ..., -1] = arr    # fancy selection
            a7[np.where(a6[:] > 4)] = 4       # point selection + broadcast
            a8[arr > 4] = arr2                # boolean selection

        """

        self._g_check_open()

        # Create an array compliant with the specified slice
        nparr = convert_to_np_atom2(value, self.atom)
        if nparr.size == 0:
            return

        # truncate data if least_significant_digit filter is set
        # TODO: add the least_significant_digit attribute to the array on disk
        if (self.filters.least_significant_digit is not None
                and not numpy.issubdtype(nparr.dtype, int)):
            nparr = quantize(nparr, self.filters.least_significant_digit)

        try:
            startl, stopl, stepl, shape = self._interpret_indexing(key)
            self._write_slice(startl, stopl, stepl, shape, nparr)
        except TypeError:
            # Then, try with a point-wise selection
            try:
                coords = self._point_selection(key)
                self._write_coords(coords, nparr)
            except TypeError:
                selection, reorder, shape = self._fancy_selection(key)
                self._write_selection(selection, reorder, shape, nparr)

    def _check_shape(self, nparr, slice_shape):
        """Test that nparr shape is consistent with underlying object.

        If not, try creating a new nparr object, using broadcasting if
        necessary.

        """

        if nparr.shape != (slice_shape + self.atom.dtype.shape):
            # Create an array compliant with the specified shape
            narr = numpy.empty(shape=slice_shape, dtype=self.atom.dtype)

            # Assign the value to it. It will raise a ValueError exception
            # if the objects cannot be broadcast to a single shape.
            narr[...] = nparr
            return narr
        else:
            return nparr

    _checkShape = previous_api(_check_shape)

    def _read_slice(self, startl, stopl, stepl, shape):
        """Read a slice based on `startl`, `stopl` and `stepl`."""

        nparr = numpy.empty(dtype=self.atom.dtype, shape=shape)
        # Protection against reading empty arrays
        if 0 not in shape:
            # Arrays that have non-zero dimensionality
            self._g_read_slice(startl, stopl, stepl, nparr)
        # For zero-shaped arrays, return the scalar
        if nparr.shape == ():
            nparr = nparr[()]
        return nparr

    _readSlice = previous_api(_read_slice)

    def _read_coords(self, coords):
        """Read a set of points defined by `coords`."""

        nparr = numpy.empty(dtype=self.atom.dtype, shape=len(coords))
        if len(coords) > 0:
            self._g_read_coords(coords, nparr)
        # For zero-shaped arrays, return the scalar
        if nparr.shape == ():
            nparr = nparr[()]
        return nparr

    _readCoords = previous_api(_read_coords)

    def _read_selection(self, selection, reorder, shape):
        """Read a `selection`.

        Reorder if necessary.

        """

        # Create the container for the slice
        nparr = numpy.empty(dtype=self.atom.dtype, shape=shape)
        # Arrays that have non-zero dimensionality
        self._g_read_selection(selection, nparr)
        # For zero-shaped arrays, return the scalar
        if nparr.shape == ():
            nparr = nparr[()]
        elif reorder is not None:
            # We need to reorder the array
            idx, neworder = reorder
            k = [slice(None)] * len(shape)
            k[idx] = neworder.argsort()
            # Apparently, a copy is not needed here, but doing it
            # for symmetry with the `_write_selection()` method.
            nparr = nparr[k].copy()
        return nparr

    _readSelection = previous_api(_read_selection)

    def _write_slice(self, startl, stopl, stepl, shape, nparr):
        """Write `nparr` in a slice based on `startl`, `stopl` and `stepl`."""

        nparr = self._check_shape(nparr, tuple(shape))
        countl = ((stopl - startl - 1) // stepl) + 1
        self._g_write_slice(startl, stepl, countl, nparr)

    _writeSlice = previous_api(_write_slice)

    def _write_coords(self, coords, nparr):
        """Write `nparr` values in points defined by `coords` coordinates."""

        if len(coords) > 0:
            nparr = self._check_shape(nparr, (len(coords), ))
            self._g_write_coords(coords, nparr)

    _writeCoords = previous_api(_write_coords)

    def _write_selection(self, selection, reorder, shape, nparr):
        """Write `nparr` in `selection`.

        Reorder if necessary.

        """

        nparr = self._check_shape(nparr, tuple(shape))
        # Check whether we should reorder the array
        if reorder is not None:
            idx, neworder = reorder
            k = [slice(None)] * len(shape)
            k[idx] = neworder
            # For a reason a don't understand well, we need a copy of
            # the reordered array
            nparr = nparr[k].copy()
        self._g_write_selection(selection, nparr)

    _writeSelection = previous_api(_write_selection)

    def _read(self, start, stop, step, out=None):
        """Read the array from disk without slice or flavor processing."""

        nrowstoread = len(xrange(0, stop - start, step))
        shape = list(self.shape)
        if shape:
            shape[self.maindim] = nrowstoread
        if out is None:
            arr = numpy.empty(dtype=self.atom.dtype, shape=shape)
        else:
            bytes_required = self.rowsize * nrowstoread
            # if buffer is too small, it will segfault
            if bytes_required != out.nbytes:
                raise ValueError(
                    ('output array size invalid, got {0} bytes, '
                     'need {1} bytes').format(out.nbytes, bytes_required))
            if not out.flags['C_CONTIGUOUS']:
                raise ValueError('output array not C contiguous')
            arr = out
        # Protection against reading empty arrays
        if 0 not in shape:
            # Arrays that have non-zero dimensionality
            self._read_array(start, stop, step, arr)
        # data is always read in the system byteorder
        # if the out array's byteorder is different, do a byteswap
        if (out is not None
                and byteorders[arr.dtype.byteorder] != sys.byteorder):
            arr.byteswap(True)
        return arr

    def read(self, start=None, stop=None, step=None, out=None):
        """Get data in the array as an object of the current flavor.

        The start, stop and step parameters can be used to select only a
        *range of rows* in the array.  Their meanings are the same as in
        the built-in range() Python function, except that negative values
        of step are not allowed yet. Moreover, if only start is specified,
        then stop will be set to start + 1. If you do not specify neither
        start nor stop, then *all the rows* in the array are selected.

        The out parameter may be used to specify a NumPy array to receive
        the output data.  Note that the array must have the same size as
        the data selected with the other parameters.  Note that the array's
        datatype is not checked and no type casting is performed, so if it
        does not match the datatype on disk, the output will not be correct.
        Also, this parameter is only valid when the array's flavor is set
        to 'numpy'.  Otherwise, a TypeError will be raised.

        When data is read from disk in NumPy format, the output will be
        in the current system's byteorder, regardless of how it is stored
        on disk.
        The exception is when an output buffer is supplied, in which case
        the output will be in the byteorder of that output buffer.

        .. versionchanged:: 3.0
           Added the *out* parameter.

        """

        self._g_check_open()
        if out is not None and self.flavor != 'numpy':
            msg = ("Optional 'out' argument may only be supplied if array "
                   "flavor is 'numpy', currently is {0}").format(self.flavor)
            raise TypeError(msg)
        (start, stop, step) = self._process_range_read(start, stop, step)
        arr = self._read(start, stop, step, out)
        return internal_to_flavor(arr, self.flavor)

    def _g_copy_with_stats(self, group, name, start, stop, step, title,
                           filters, chunkshape, _log, **kwargs):
        """Private part of Leaf.copy() for each kind of leaf."""

        # Compute the correct indices.
        (start, stop, step) = self._process_range_read(start, stop, step)
        # Get the slice of the array
        # (non-buffered version)
        if self.shape:
            arr = self[start:stop:step]
        else:
            arr = self[()]
        # Build the new Array object.  Use the _atom reserved keyword
        # just in case the array is being copied from a native HDF5
        # with atomic types different from scalars.
        # For details, see #275 of trac.
        object_ = Array(group,
                        name,
                        arr,
                        title=title,
                        _log=_log,
                        _atom=self.atom)
        nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size

        return (object_, nbytes)

    _g_copyWithStats = previous_api(_g_copy_with_stats)

    def __repr__(self):
        """This provides more metainfo in addition to standard __str__"""

        return """%s
  atom := %r
  maindim := %r
  flavor := %r
  byteorder := %r
  chunkshape := %r""" % (self, self.atom, self.maindim, self.flavor,
                         self.byteorder, self.chunkshape)
Example #14
0
class Expr(object):
    """A class for evaluating expressions with arbitrary array-like objects.

    Expr is a class for evaluating expressions containing array-like objects.
    With it, you can evaluate expressions (like "3 * a + 4 * b") that
    operate on arbitrary large arrays while optimizing the resources
    required to perform them (basically main memory and CPU cache memory).
    It is similar to the Numexpr package (see :ref:`[NUMEXPR] <NUMEXPR>`),
    but in addition to NumPy objects, it also accepts disk-based homogeneous
    arrays, like the Array, CArray, EArray and Column PyTables objects.

    All the internal computations are performed via the Numexpr package,
    so all the broadcast and upcasting rules of Numexpr applies here too.
    These rules are very similar to the NumPy ones, but with some exceptions
    due to the particularities of having to deal with potentially very large
    disk-based arrays.  Be sure to read the documentation of the Expr
    constructor and methods as well as that of Numexpr, if you want to fully
    grasp these particularities.

    Parameters
    ----------
    expr : str
        This specifies the expression to be evaluated, such as "2 * a + 3 * b".
    uservars : dict
        This can be used to define the variable names appearing in *expr*.
        This mapping should consist of identifier-like strings pointing to any
        `Array`, `CArray`, `EArray`, `Column` or NumPy ndarray instances (or
        even others which will tried to be converted to ndarrays).  When
        `uservars` is not provided or `None`, the current local and global
        namespace is sought instead of `uservars`.  It is also possible to pass
        just some of the variables in expression via the `uservars` mapping,
        and the rest will be retrieved from the current local and global
        namespaces.
    kwargs : dict
        This is meant to pass additional parameters to the Numexpr kernel.
        This is basically the same as the kwargs argument in
        Numexpr.evaluate(), and is mainly meant for advanced use.

    Examples
    --------
    The following shows an example of using Expr.

        >>> a = f.create_array('/', 'a', np.array([1,2,3]))
        >>> b = f.create_array('/', 'b', np.array([3,4,5]))
        >>> c = np.array([4,5,6])
        >>> expr = tb.Expr("2 * a + b * c")   # initialize the expression
        >>> expr.eval()                 # evaluate it
        array([14, 24, 36])
        >>> sum(expr)                   # use as an iterator
        74

    where you can see that you can mix different containers in
    the expression (whenever shapes are consistent).

    You can also work with multidimensional arrays::

        >>> a2 = f.create_array('/', 'a2', np.array([[1,2],[3,4]]))
        >>> b2 = f.create_array('/', 'b2', np.array([[3,4],[5,6]]))
        >>> c2 = np.array([4,5])           # This will be broadcasted
        >>> expr = tb.Expr("2 * a2 + b2-c2")
        >>> expr.eval()
        array([[1, 3],
               [7, 9]])
        >>> sum(expr)
        array([ 8, 12])

    .. rubric:: Expr attributes

    .. attribute:: append_mode

        The append mode for user-provided output containers.

    .. attribute:: maindim

        Common main dimension for inputs in expression.

    .. attribute:: names

        The names of variables in expression (list).

    .. attribute:: out

        The user-provided container (if any) for the expression outcome.

    .. attribute:: o_start

        The start range selection for the user-provided output.

    .. attribute:: o_stop

        The stop range selection for the user-provided output.

    .. attribute:: o_step

        The step range selection for the user-provided output.

    .. attribute:: shape

        Common shape for the arrays in expression.

    .. attribute:: values

        The values of variables in expression (list).

    """

    _exprvars_cache = {}
    """Cache of variables participating in expressions.

    .. versionadded:: 3.0

    """
    def __init__(self, expr, uservars=None, **kwargs):

        self.append_mode = False
        """The append mode for user-provided output containers."""
        self.maindim = 0
        """Common main dimension for inputs in expression."""
        self.names = []
        """The names of variables in expression (list)."""
        self.out = None
        """The user-provided container (if any) for the expression outcome."""
        self.o_start = None
        """The start range selection for the user-provided output."""
        self.o_stop = None
        """The stop range selection for the user-provided output."""
        self.o_step = None
        """The step range selection for the user-provided output."""
        self.shape = None
        """Common shape for the arrays in expression."""
        self.start, self.stop, self.step = (None, ) * 3
        self.start = None
        """The start range selection for the input."""
        self.stop = None
        """The stop range selection for the input."""
        self.step = None
        """The step range selection for the input."""
        self.values = []
        """The values of variables in expression (list)."""

        self._compiled_expr = None
        """The compiled expression."""
        self._single_row_out = None
        """A sample of the output with just a single row."""

        # First, get the signature for the arrays in expression
        vars_ = self._required_expr_vars(expr, uservars)
        context = getContext(kwargs)
        self.names, _ = getExprNames(expr, context)

        # Raise a ValueError in case we have unsupported objects
        for name, var in vars_.iteritems():
            if type(var) in (int, long, float, str):
                continue
            if not isinstance(var, (tb.Leaf, tb.Column)):
                if hasattr(var, "dtype"):
                    # Quacks like a NumPy object
                    continue
                raise TypeError("Unsupported variable type: %r" % var)
            objname = var.__class__.__name__
            if objname not in ("Array", "CArray", "EArray", "Column"):
                raise TypeError("Unsupported variable type: %r" % var)

        # NumPy arrays to be copied? (we don't need to worry about
        # PyTables objects, as the reads always return contiguous and
        # aligned objects, or at least I think so).
        for name, var in vars_.iteritems():
            if isinstance(var, np.ndarray):
                # See numexpr.necompiler.evaluate for a rational
                # of the code below
                if not var.flags.aligned:
                    if var.ndim != 1:
                        # Do a copy of this variable
                        var = var.copy()
                        # Update the vars_ dictionary
                        vars_[name] = var

        # Get the variables and types
        values = self.values
        types_ = []
        for name in self.names:
            value = vars_[name]
            if hasattr(value, 'atom'):
                types_.append(value.atom)
            elif hasattr(value, 'dtype'):
                types_.append(value)
            else:
                # try to convert into a NumPy array
                value = np.array(value)
                types_.append(value)
            values.append(value)

        # Create a signature for the expression
        signature = [(name, getType(type_))
                     for (name, type_) in zip(self.names, types_)]

        # Compile the expression
        self._compiled_expr = NumExpr(expr, signature, **kwargs)

        # Guess the shape for the outcome and the maindim of inputs
        self.shape, self.maindim = self._guess_shape()

    # The next method is similar to their counterpart in `Table`, but
    # adapted to the `Expr` own requirements.
    def _required_expr_vars(self, expression, uservars, depth=2):
        """Get the variables required by the `expression`.

        A new dictionary defining the variables used in the `expression`
        is returned.  Required variables are first looked up in the
        `uservars` mapping, then in the set of top-level columns of the
        table.  Unknown variables cause a `NameError` to be raised.

        When `uservars` is `None`, the local and global namespace where
        the API callable which uses this method is called is sought
        instead.  To disable this mechanism, just specify a mapping as
        `uservars`.

        Nested columns and variables with an ``uint64`` type are not
        allowed (`TypeError` and `NotImplementedError` are raised,
        respectively).

        `depth` specifies the depth of the frame in order to reach local
        or global variables.

        """

        # Get the names of variables used in the expression.
        exprvars_cache = self._exprvars_cache
        if not expression in exprvars_cache:
            # Protection against growing the cache too much
            if len(exprvars_cache) > 256:
                # Remove 10 (arbitrary) elements from the cache
                for k in exprvars_cache.keys()[:10]:
                    del exprvars_cache[k]
            cexpr = compile(expression, '<string>', 'eval')
            exprvars = [
                var for var in cexpr.co_names
                if var not in ['None', 'False', 'True']
                and var not in numexpr_functions
            ]
            exprvars_cache[expression] = exprvars
        else:
            exprvars = exprvars_cache[expression]

        # Get the local and global variable mappings of the user frame
        # if no mapping has been explicitly given for user variables.
        user_locals, user_globals = {}, {}
        if uservars is None:
            user_frame = sys._getframe(depth)
            user_locals = user_frame.f_locals
            user_globals = user_frame.f_globals

        # Look for the required variables first among the ones
        # explicitly provided by the user.
        reqvars = {}
        for var in exprvars:
            # Get the value.
            if uservars is not None and var in uservars:
                val = uservars[var]
            elif uservars is None and var in user_locals:
                val = user_locals[var]
            elif uservars is None and var in user_globals:
                val = user_globals[var]
            else:
                raise NameError("name ``%s`` is not defined" % var)

            # Check the value.
            if hasattr(val, 'dtype') and val.dtype.str[1:] == 'u8':
                raise NotImplementedError(
                    "variable ``%s`` refers to "
                    "a 64-bit unsigned integer object, that is "
                    "not yet supported in expressions, sorry; " % var)
            elif hasattr(val, '_v_colpathnames'):  # nested column
                # This branch is never reached because the compile step
                # above already raise a ``TypeError`` for nested
                # columns, but that could change in the future.  So it
                # is best to let this here.
                raise TypeError("variable ``%s`` refers to a nested column, "
                                "not allowed in expressions" % var)
            reqvars[var] = val
        return reqvars

    _requiredExprVars = previous_api(_required_expr_vars)

    def set_inputs_range(self, start=None, stop=None, step=None):
        """Define a range for all inputs in expression.

        The computation will only take place for the range defined by
        the start, stop and step parameters in the main dimension of
        inputs (or the leading one, if the object lacks the concept of
        main dimension, like a NumPy container).  If not a common main
        dimension exists for all inputs, the leading dimension will be
        used instead.

        """

        self.start = start
        self.stop = stop
        self.step = step

    setInputsRange = previous_api(set_inputs_range)

    def set_output(self, out, append_mode=False):
        """Set out as container for output as well as the append_mode.

        The out must be a container that is meant to keep the outcome of
        the expression.  It should be an homogeneous type container and
        can typically be an Array, CArray, EArray, Column or a NumPy ndarray.

        The append_mode specifies the way of which the output is filled.
        If true, the rows of the outcome are *appended* to the out container.
        Of course, for doing this it is necessary that out would have an
        append() method (like an EArray, for example).

        If append_mode is false, the output is set via the __setitem__()
        method (see the Expr.set_output_range() for info on how to select
        the rows to be updated).  If out is smaller than what is required
        by the expression, only the computations that are needed to fill
        up the container are carried out.  If it is larger, the excess
        elements are unaffected.

        """

        if not (hasattr(out, "shape") and hasattr(out, "__setitem__")):
            raise ValueError(
                "You need to pass a settable multidimensional container "
                "as output")
        self.out = out
        if append_mode and not hasattr(out, "append"):
            raise ValueError(
                "For activating the ``append`` mode, you need a container "
                "with an `append()` method (like the `EArray`)")
        self.append_mode = append_mode

    setOutput = previous_api(set_output)

    def set_output_range(self, start=None, stop=None, step=None):
        """Define a range for user-provided output object.

        The output object will only be modified in the range specified by the
        start, stop and step parameters in the main dimension of output (or the
        leading one, if the object does not have the concept of main dimension,
        like a NumPy container).

        """

        if self.out is None:
            raise IndexError(
                "You need to pass an output object to `setOut()` first")
        self.o_start = start
        self.o_stop = stop
        self.o_step = step

    setOutputRange = previous_api(set_output_range)

    # Although the next code is similar to the method in `Leaf`, it
    # allows the use of pure NumPy objects.
    def _calc_nrowsinbuf(self, object_):
        """Calculate the number of rows that will fit in a buffer."""

        # Compute the rowsize for the *leading* dimension
        shape_ = list(object_.shape)
        if shape_:
            shape_[0] = 1

        rowsize = np.prod(shape_) * object_.dtype.itemsize

        # Compute the nrowsinbuf
        # Multiplying the I/O buffer size by 4 gives optimal results
        # in my benchmarks with `tables.Expr` (see ``bench/poly.py``)
        buffersize = IO_BUFFER_SIZE * 4
        nrowsinbuf = buffersize // rowsize

        # Safeguard against row sizes being extremely large
        if nrowsinbuf == 0:
            nrowsinbuf = 1
            # If rowsize is too large, issue a Performance warning
            maxrowsize = BUFFER_TIMES * buffersize
            if rowsize > maxrowsize:
                warnings.warn(
                    """\
The object ``%s`` is exceeding the maximum recommended rowsize (%d
bytes); be ready to see PyTables asking for *lots* of memory and
possibly slow I/O.  You may want to reduce the rowsize by trimming the
value of dimensions that are orthogonal (and preferably close) to the
*leading* dimension of this object.""" % (object, maxrowsize),
                    PerformanceWarning)

        return nrowsinbuf

    def _guess_shape(self):
        """Guess the shape of the output of the expression."""

        # First, compute the maximum dimension of inputs and maindim
        # (if it exists)
        maxndim = 0
        maindims = []
        for val in self.values:
            # Get the minimum of the lengths
            if len(val.shape) > maxndim:
                maxndim = len(val.shape)
            if hasattr(val, "maindim"):
                maindims.append(val.maindim)
        if maxndim == 0:
            self._single_row_out = out = self._compiled_expr(*self.values)
            return (), None
        if maindims and [maindims[0]] * len(maindims) == maindims:
            # If all maindims detected are the same, use this as maindim
            maindim = maindims[0]
        else:
            # If not, the main dimension will be the default one
            maindim = 0

        # The slices parameter for inputs
        slices = (slice(None), ) * maindim + (0, )

        # Now, collect the values in first row of arrays with maximum dims
        vals = []
        lens = []
        for val in self.values:
            shape = val.shape
            # Warning: don't use len(val) below or it will raise an
            # `Overflow` error on 32-bit platforms for large enough arrays.
            if shape != () and shape[maindim] == 0:
                vals.append(val[:])
                lens.append(0)
            elif len(shape) < maxndim:
                vals.append(val)
            else:
                vals.append(val.__getitem__(slices))
                lens.append(shape[maindim])
        minlen = min(lens)
        self._single_row_out = out = self._compiled_expr(*vals)
        shape = list(out.shape)
        if minlen > 0:
            shape.insert(maindim, minlen)
        return shape, maindim

    def _get_info(self, shape, maindim, itermode=False):
        """Return various info needed for evaluating the computation loop."""

        # Compute the shape of the resulting container having
        # in account new possible values of start, stop and step in
        # the inputs range
        if maindim is not None:
            (start, stop, step) = get_indices(self.start, self.stop, self.step,
                                              shape[maindim])
            shape[maindim] = min(shape[maindim], len(xrange(start, stop,
                                                            step)))
            i_nrows = shape[maindim]
        else:
            start, stop, step = 0, 0, None
            i_nrows = 0

        if not itermode:
            # Create a container for output if not defined yet
            o_maindim = 0  # Default maindim
            if self.out is None:
                out = np.empty(shape, dtype=self._single_row_out.dtype)
                # Get the trivial values for start, stop and step
                if maindim is not None:
                    (o_start, o_stop, o_step) = (0, shape[maindim], 1)
                else:
                    (o_start, o_stop, o_step) = (0, 0, 1)
            else:
                out = self.out
                # Out container already provided.  Do some sanity checks.
                if hasattr(out, "maindim"):
                    o_maindim = out.maindim

                # Refine the shape of the resulting container having in
                # account new possible values of start, stop and step in
                # the output range
                o_shape = list(out.shape)
                (o_start, o_stop,
                 o_step) = get_indices(self.o_start, self.o_stop, self.o_step,
                                       o_shape[o_maindim])
                o_shape[o_maindim] = min(o_shape[o_maindim],
                                         len(xrange(o_start, o_stop, o_step)))

                # Check that the shape of output is consistent with inputs
                tr_oshape = list(o_shape)  # this implies a copy
                olen_ = tr_oshape.pop(o_maindim)
                tr_shape = list(shape)  # do a copy
                if maindim is not None:
                    len_ = tr_shape.pop(o_maindim)
                else:
                    len_ = 1
                if tr_oshape != tr_shape:
                    raise ValueError(
                        "Shape for out container does not match expression")
                # Force the input length to fit in `out`
                if not self.append_mode and olen_ < len_:
                    shape[o_maindim] = olen_
                    stop = start + olen_

        # Get the positions of inputs that should be sliced (the others
        # will be broadcasted)
        ndim = len(shape)
        slice_pos = [
            i for i, val in enumerate(self.values) if len(val.shape) == ndim
        ]

        # The size of the I/O buffer
        nrowsinbuf = 1
        for i, val in enumerate(self.values):
            # Skip scalar values in variables
            if i in slice_pos:
                nrows = self._calc_nrowsinbuf(val)
                if nrows > nrowsinbuf:
                    nrowsinbuf = nrows

        if not itermode:
            return (i_nrows, slice_pos, start, stop, step, nrowsinbuf, out,
                    o_maindim, o_start, o_stop, o_step)
        else:
            # For itermode, we don't need the out info
            return (i_nrows, slice_pos, start, stop, step, nrowsinbuf)

    def eval(self):
        """Evaluate the expression and return the outcome.

        Because of performance reasons, the computation order tries to go along
        the common main dimension of all inputs.  If not such a common main
        dimension is found, the iteration will go along the leading dimension
        instead.

        For non-consistent shapes in inputs (i.e. shapes having a different
        number of dimensions), the regular NumPy broadcast rules applies.
        There is one exception to this rule though: when the dimensions
        orthogonal to the main dimension of the expression are consistent, but
        the main dimension itself differs among the inputs, then the shortest
        one is chosen for doing the computations.  This is so because trying to
        expand very large on-disk arrays could be too expensive or simply not
        possible.

        Also, the regular Numexpr casting rules (which are similar to those of
        NumPy, although you should check the Numexpr manual for the exceptions)
        are applied to determine the output type.

        Finally, if the setOuput() method specifying a user container has
        already been called, the output is sent to this user-provided
        container.  If not, a fresh NumPy container is returned instead.

        .. warning::

            When dealing with large on-disk inputs, failing to specify an
            on-disk container may consume all your available memory.

        """

        values, shape, maindim = self.values, self.shape, self.maindim

        # Get different info we need for the main computation loop
        (i_nrows, slice_pos, start, stop, step, nrowsinbuf,
         out, o_maindim, o_start, o_stop, o_step) = \
            self._get_info(shape, maindim)

        if i_nrows == 0:
            # No elements to compute
            return self._single_row_out

        # Create a key that selects every element in inputs and output
        # (including the main dimension)
        i_slices = [slice(None)] * (maindim + 1)
        o_slices = [slice(None)] * (o_maindim + 1)

        # This is a hack to prevent doing unnecessary flavor conversions
        # while reading buffers
        for val in values:
            if hasattr(val, 'maindim'):
                val._v_convert = False

        # Start the computation itself
        for start2 in xrange(start, stop, step * nrowsinbuf):
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice for inputs
            i_slices[maindim] = slice(start2, stop2, step)
            # Get the input values
            vals = []
            for i, val in enumerate(values):
                if i in slice_pos:
                    vals.append(val.__getitem__(tuple(i_slices)))
                else:
                    # A read of values is not apparently needed, as PyTables
                    # leaves seems to work just fine inside Numexpr
                    vals.append(val)
            # Do the actual computation for this slice
            rout = self._compiled_expr(*vals)
            # Set the values into the out buffer
            if self.append_mode:
                out.append(rout)
            else:
                # Compute the slice to be filled in output
                start3 = o_start + (start2 - start) // step
                stop3 = start3 + nrowsinbuf * o_step
                if stop3 > o_stop:
                    stop3 = o_stop
                o_slices[o_maindim] = slice(start3, stop3, o_step)
                # Set the slice
                out[tuple(o_slices)] = rout

        # Activate the conversion again (default)
        for val in values:
            if hasattr(val, 'maindim'):
                val._v_convert = True

        return out

    def __iter__(self):
        """Iterate over the rows of the outcome of the expression.

        This iterator always returns rows as NumPy objects, so a possible out
        container specified in :meth:`Expr.set_output` method is ignored here.

        """

        values, shape, maindim = self.values, self.shape, self.maindim

        # Get different info we need for the main computation loop
        (i_nrows, slice_pos, start, stop, step, nrowsinbuf) = \
            self._get_info(shape, maindim, itermode=True)

        if i_nrows == 0:
            # No elements to compute
            return

        # Create a key that selects every element in inputs
        # (including the main dimension)
        i_slices = [slice(None)] * (maindim + 1)

        # This is a hack to prevent doing unnecessary flavor conversions
        # while reading buffers
        for val in values:
            if hasattr(val, 'maindim'):
                val._v_convert = False

        # Start the computation itself
        for start2 in xrange(start, stop, step * nrowsinbuf):
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the main dimension
            i_slices[maindim] = slice(start2, stop2, step)
            # Get the values for computing the buffer
            vals = []
            for i, val in enumerate(values):
                if i in slice_pos:
                    vals.append(val.__getitem__(tuple(i_slices)))
                else:
                    # A read of values is not apparently needed, as PyTables
                    # leaves seems to work just fine inside Numexpr
                    vals.append(val)
            # Do the actual computation
            rout = self._compiled_expr(*vals)
            # Return one row per call
            for row in rout:
                yield row

        # Activate the conversion again (default)
        for val in values:
            if hasattr(val, 'maindim'):
                val._v_convert = True
Example #15
0
class EArray(CArray):
    """This class represents extendable, homogeneous datasets in an HDF5 file.

    The main difference between an EArray and a CArray (see
    :ref:`CArrayClassDescr`), from which it inherits, is that the former
    can be enlarged along one of its dimensions, the *enlargeable
    dimension*.  That means that the :attr:`Leaf.extdim` attribute (see
    :class:`Leaf`) of any EArray instance will always be non-negative.
    Multiple enlargeable dimensions might be supported in the future.

    New rows can be added to the end of an enlargeable array by using the
    :meth:`EArray.append` method.

    Parameters
    ----------
    parentnode
        The parent :class:`Group` object.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*.

    name : str
        The name of this node in its parent group.

    atom
        An `Atom` instance representing the *type* and *shape*
        of the atomic objects to be saved.

    shape
        The shape of the new array.  One (and only one) of
        the shape dimensions *must* be 0.  The dimension being 0
        means that the resulting `EArray` object can be extended
        along it.  Multiple enlargeable dimensions are not supported
        right now.

    title
        A description for this node (it sets the ``TITLE``
        HDF5 attribute on disk).

    filters
        An instance of the `Filters` class that provides information
        about the desired I/O filters to be applied during the life
        of this object.

    expectedrows
        A user estimate about the number of row elements that will
        be added to the growable dimension in the `EArray` node.
        If not provided, the default value is ``EXPECTED_ROWS_EARRAY``
        (see ``tables/parameters.py``).  If you plan to create either
        a much smaller or a much bigger `EArray` try providing a guess;
        this will optimize the HDF5 B-Tree creation and management
        process time and the amount of memory used.

    chunkshape
        The shape of the data chunk to be read or written in a single
        HDF5 I/O operation.  Filters are applied to those chunks of data.
        The dimensionality of `chunkshape` must be the same as that of
        `shape` (beware: no dimension should be 0 this time!).
        If ``None``, a sensible value is calculated based on the
        `expectedrows` parameter (which is recommended).

    byteorder
        The byteorder of the data *on disk*, specified as 'little' or
        'big'. If this is not specified, the byteorder is that of the
        platform.

    Examples
    --------

    See below a small example of the use of the `EArray` class.  The
    code is available in ``examples/earray1.py``::

        import tables
        import numpy

        fileh = tables.open_file('earray1.h5', mode='w')
        a = tables.StringAtom(itemsize=8)

        # Use ``a`` as the object type for the enlargeable array.
        array_c = fileh.create_earray(fileh.root, 'array_c', a, (0,),
                                      \"Chars\")
        array_c.append(numpy.array(['a'*2, 'b'*4], dtype='S8'))
        array_c.append(numpy.array(['a'*6, 'b'*8, 'c'*10], dtype='S8'))

        # Read the string ``EArray`` we have created on disk.
        for s in array_c:
            print('array_c[%s] => %r' % (array_c.nrow, s))
        # Close the file.
        fileh.close()

    The output for the previous script is something like::

        array_c[0] => 'aa'
        array_c[1] => 'bbbb'
        array_c[2] => 'aaaaaa'
        array_c[3] => 'bbbbbbbb'
        array_c[4] => 'cccccccc'

    """

    # Class identifier.
    _c_classid = 'EARRAY'

    _c_classId = previous_api_property('_c_classid')

    # Special methods
    # ~~~~~~~~~~~~~~~
    def __init__(self, parentnode, name,
                 atom=None, shape=None, title="",
                 filters=None, expectedrows=None,
                 chunkshape=None, byteorder=None,
                 _log=True):

        # Specific of EArray
        if expectedrows is None:
            expectedrows = parentnode._v_file.params['EXPECTED_ROWS_EARRAY']
        self._v_expectedrows = expectedrows
        """The expected number of rows to be stored in the array."""

        # Call the parent (CArray) init code
        super(EArray, self).__init__(parentnode, name, atom, shape, title,
                                     filters, chunkshape, byteorder, _log)

    # Public and private methods
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~
    def _g_create(self):
        """Create a new array in file (specific part)."""

        # Pre-conditions and extdim computation
        zerodims = numpy.sum(numpy.array(self.shape) == 0)
        if zerodims > 0:
            if zerodims == 1:
                self.extdim = list(self.shape).index(0)
            else:
                raise NotImplementedError(
                    "Multiple enlargeable (0-)dimensions are not "
                    "supported.")
        else:
            raise ValueError(
                "When creating EArrays, you need to set one of "
                "the dimensions of the Atom instance to zero.")

        # Finish the common part of the creation process
        return self._g_create_common(self._v_expectedrows)

    def _check_shape_append(self, nparr):
        "Test that nparr shape is consistent with underlying EArray."

        # The arrays conforms self expandibility?
        myrank = len(self.shape)
        narank = len(nparr.shape) - len(self.atom.shape)
        if myrank != narank:
            raise ValueError(("the ranks of the appended object (%d) and the "
                              "``%s`` EArray (%d) differ")
                             % (narank, self._v_pathname, myrank))
        for i in range(myrank):
            if i != self.extdim and self.shape[i] != nparr.shape[i]:
                raise ValueError(("the shapes of the appended object and the "
                                  "``%s`` EArray differ in non-enlargeable "
                                  "dimension %d") % (self._v_pathname, i))

    _checkShapeAppend = previous_api(_check_shape_append)

    def append(self, sequence):
        """Add a sequence of data to the end of the dataset.

        The sequence must have the same type as the array; otherwise a
        TypeError is raised. In the same way, the dimensions of the
        sequence must conform to the shape of the array, that is, all
        dimensions must match, with the exception of the enlargeable
        dimension, which can be of any length (even 0!).  If the shape
        of the sequence is invalid, a ValueError is raised.

        """

        self._g_check_open()
        self._v_file._check_writable()

        # Convert the sequence into a NumPy object
        nparr = convert_to_np_atom2(sequence, self.atom)
        # Check if it has a consistent shape with underlying EArray
        self._check_shape_append(nparr)
        # If the size of the nparr is zero, don't do anything else
        if nparr.size > 0:
            self._append(nparr)

    def _g_copy_with_stats(self, group, name, start, stop, step,
                           title, filters, chunkshape, _log, **kwargs):
        """Private part of Leaf.copy() for each kind of leaf."""

        (start, stop, step) = self._process_range_read(start, stop, step)
        # Build the new EArray object
        maindim = self.maindim
        shape = list(self.shape)
        shape[maindim] = 0
        # The number of final rows
        nrows = len(xrange(0, stop - start, step))
        # Build the new EArray object
        object = EArray(
            group, name, atom=self.atom, shape=shape, title=title,
            filters=filters, expectedrows=nrows, chunkshape=chunkshape,
            _log=_log)
        # Now, fill the new earray with values from source
        nrowsinbuf = self.nrowsinbuf
        # The slices parameter for self.__getitem__
        slices = [slice(0, dim, 1) for dim in self.shape]
        # This is a hack to prevent doing unnecessary conversions
        # when copying buffers
        self._v_convert = False
        # Start the copy itself
        for start2 in xrange(start, stop, step * nrowsinbuf):
            # Save the records on disk
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the extensible dimension
            slices[maindim] = slice(start2, stop2, step)
            object._append(self.__getitem__(tuple(slices)))
        # Active the conversion again (default)
        self._v_convert = True
        nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.itemsize

        return (object, nbytes)

    _g_copyWithStats = previous_api(_g_copy_with_stats)
Example #16
0
    if keyword.iskeyword(name):
        warnings.warn("object name is a Python keyword: %r; %s"
                      % (name, warnInfo), NaturalNameWarning)
        return

    # Still, names starting with reserved prefixes are not allowed.
    if _reserved_id_re.match(name):
        raise ValueError("object name starts with a reserved prefix: %r; "
                         "it matches the pattern ``%s``"
                         % (name, _reserved_id_re.pattern))

    # ``__members__`` is the only exception to that rule.
    if name == '__members__':
        raise ValueError("``__members__`` is not allowed as an object name")

checkNameValidity = previous_api(check_name_validity)


def join_path(parentpath, name):
    """Join a *canonical* `parentpath` with a *non-empty* `name`.

    .. versionchanged:: 3.0
       The *parentPath* parameter has been renamed into *parentpath*.

    >>> join_path('/', 'foo')
    '/foo'
    >>> join_path('/foo', 'bar')
    '/foo/bar'
    >>> join_path('/foo', '/foo2/bar')
    '/foo/foo2/bar'
    >>> join_path('/foo', '/')
Example #17
0
        nrowscopied = SizeType(0)
        nbytes = 0
        if not hasattr(self.atom, 'size'):  # it is a pseudo-atom
            atomsize = self.atom.base.size
        else:
            atomsize = self.atom.size
        for start2 in xrange(start, stop, step * nrowsinbuf):
            # Save the records on disk
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            nparr = self._read_array(start=start2, stop=stop2, step=step)[0]
            nobjects = nparr.shape[0]
            object._append(nparr, nobjects)
            nbytes += nobjects * atomsize
            nrowscopied += 1
        object.nrows = nrowscopied
        return (object, nbytes)

    _g_copyWithStats = previous_api(_g_copy_with_stats)

    def __repr__(self):
        """This provides more metainfo in addition to standard __str__"""

        return """%s
  atom = %r
  byteorder = %r
  nrows = %s
  flavor = %r""" % (self, self.atom, self.byteorder, self.nrows,
                    self.flavor)
Example #18
0
class CArray(Array):
    """This class represents homogeneous datasets in an HDF5 file.

    The difference between a CArray and a normal Array (see
    :ref:`ArrayClassDescr`), from which it inherits, is that a CArray
    has a chunked layout and, as a consequence, it supports compression.
    You can use datasets of this class to easily save or load arrays to
    or from disk, with compression support included.

    CArray includes all the instance variables and methods of Array.
    Only those with different behavior are mentioned here.

    Parameters
    ----------
    parentnode
        The parent :class:`Group` object.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*.

    name : str
        The name of this node in its parent group.
    atom
       An `Atom` instance representing the *type* and *shape* of
       the atomic objects to be saved.

    shape
       The shape of the new array.

    title
       A description for this node (it sets the ``TITLE`` HDF5
       attribute on disk).

    filters
       An instance of the `Filters` class that provides
       information about the desired I/O filters to be applied
       during the life of this object.

    chunkshape
       The shape of the data chunk to be read or written in a
       single HDF5 I/O operation.  Filters are applied to those
       chunks of data.  The dimensionality of `chunkshape` must
       be the same as that of `shape`.  If ``None``, a sensible
       value is calculated (which is recommended).

    byteorder
        The byteorder of the data *on disk*, specified as 'little'
        or 'big'.  If this is not specified, the byteorder is that
        of the platform.

    Examples
    --------

    See below a small example of the use of the `CArray` class.
    The code is available in ``examples/carray1.py``::

        import numpy
        import tables

        fileName = 'carray1.h5'
        shape = (200, 300)
        atom = tables.UInt8Atom()
        filters = tables.Filters(complevel=5, complib='zlib')

        h5f = tables.open_file(fileName, 'w')
        ca = h5f.create_carray(h5f.root, 'carray', atom, shape,
                               filters=filters)

        # Fill a hyperslab in ``ca``.
        ca[10:60, 20:70] = numpy.ones((50, 50))
        h5f.close()

        # Re-open a read another hyperslab
        h5f = tables.open_file(fileName)
        print(h5f)
        print(h5f.root.carray[8:12, 18:22])
        h5f.close()

    The output for the previous script is something like::

        carray1.h5 (File) ''
        Last modif.: 'Thu Apr 12 10:15:38 2007'
        Object Tree:
        / (RootGroup) ''
        /carray (CArray(200, 300), shuffle, zlib(5)) ''

        [[0 0 0 0]
         [0 0 0 0]
         [0 0 1 1]
         [0 0 1 1]]

    """

    # Class identifier.
    _c_classid = 'CARRAY'

    _c_classId = previous_api_property('_c_classid')

    # Properties
    # ~~~~~~~~~~
    # Special methods
    # ~~~~~~~~~~~~~~~
    def __init__(self,
                 parentnode,
                 name,
                 atom=None,
                 shape=None,
                 title="",
                 filters=None,
                 chunkshape=None,
                 byteorder=None,
                 _log=True):

        self.atom = atom
        """An `Atom` instance representing the shape, type of the atomic
        objects to be saved.
        """
        self.shape = None
        """The shape of the stored array."""
        self.extdim = -1  # `CArray` objects are not enlargeable by default
        """The index of the enlargeable dimension."""

        # Other private attributes
        self._v_version = None
        """The object version of this array."""
        self._v_new = new = atom is not None
        """Is this the first time the node has been created?"""
        self._v_new_title = title
        """New title for this node."""
        self._v_convert = True
        """Whether the ``Array`` object must be converted or not."""
        self._v_chunkshape = chunkshape
        """Private storage for the `chunkshape` property of the leaf."""

        # Miscellaneous iteration rubbish.
        self._start = None
        """Starting row for the current iteration."""
        self._stop = None
        """Stopping row for the current iteration."""
        self._step = None
        """Step size for the current iteration."""
        self._nrowsread = None
        """Number of rows read up to the current state of iteration."""
        self._startb = None
        """Starting row for current buffer."""
        self._stopb = None
        """Stopping row for current buffer. """
        self._row = None
        """Current row in iterators (sentinel)."""
        self._init = False
        """Whether we are in the middle of an iteration or not (sentinel)."""
        self.listarr = None
        """Current buffer in iterators."""

        if new:
            if not isinstance(atom, Atom):
                raise ValueError("atom parameter should be an instance of "
                                 "tables.Atom and you passed a %s." %
                                 type(atom))
            if shape is None:
                raise ValueError("you must specify a non-empty shape")
            try:
                shape = tuple(shape)
            except TypeError:
                raise TypeError("`shape` parameter must be a sequence "
                                "and you passed a %s" % type(shape))
            self.shape = tuple(SizeType(s) for s in shape)

            if chunkshape is not None:
                try:
                    chunkshape = tuple(chunkshape)
                except TypeError:
                    raise TypeError(
                        "`chunkshape` parameter must be a sequence "
                        "and you passed a %s" % type(chunkshape))
                if len(shape) != len(chunkshape):
                    raise ValueError("the shape (%s) and chunkshape (%s) "
                                     "ranks must be equal." %
                                     (shape, chunkshape))
                elif min(chunkshape) < 1:
                    raise ValueError("chunkshape parameter cannot have "
                                     "zero-dimensions.")
                self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)

        # The `Array` class is not abstract enough! :(
        super(Array, self).__init__(parentnode, name, new, filters, byteorder,
                                    _log)

    def _g_create(self):
        """Create a new array in file (specific part)."""

        if min(self.shape) < 1:
            raise ValueError("shape parameter cannot have zero-dimensions.")
        # Finish the common part of creation process
        return self._g_create_common(self.nrows)

    def _g_create_common(self, expectedrows):
        """Create a new array in file (common part)."""

        self._v_version = obversion

        if self._v_chunkshape is None:
            # Compute the optimal chunk size
            self._v_chunkshape = self._calc_chunkshape(expectedrows,
                                                       self.rowsize,
                                                       self.atom.size)
        # Compute the optimal nrowsinbuf
        self.nrowsinbuf = self._calc_nrowsinbuf()
        # Correct the byteorder if needed
        if self.byteorder is None:
            self.byteorder = correct_byteorder(self.atom.type, sys.byteorder)

        try:
            # ``self._v_objectid`` needs to be set because would be
            # needed for setting attributes in some descendants later
            # on
            self._v_objectid = self._create_carray(self._v_new_title)
        except:  # XXX
            # Problems creating the Array on disk. Close node and re-raise.
            self.close(flush=0)
            raise

        return self._v_objectid

    def _g_copy_with_stats(self, group, name, start, stop, step, title,
                           filters, chunkshape, _log, **kwargs):
        """Private part of Leaf.copy() for each kind of leaf."""

        (start, stop, step) = self._process_range_read(start, stop, step)
        maindim = self.maindim
        shape = list(self.shape)
        shape[maindim] = len(xrange(0, stop - start, step))
        # Now, fill the new carray with values from source
        nrowsinbuf = self.nrowsinbuf
        # The slices parameter for self.__getitem__
        slices = [slice(0, dim, 1) for dim in self.shape]
        # This is a hack to prevent doing unnecessary conversions
        # when copying buffers
        self._v_convert = False
        # Build the new CArray object
        object = CArray(group,
                        name,
                        atom=self.atom,
                        shape=shape,
                        title=title,
                        filters=filters,
                        chunkshape=chunkshape,
                        _log=_log)
        # Start the copy itself
        for start2 in xrange(start, stop, step * nrowsinbuf):
            # Save the records on disk
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the main dimension
            slices[maindim] = slice(start2, stop2, step)
            start3 = (start2 - start) // step
            stop3 = start3 + nrowsinbuf
            if stop3 > shape[maindim]:
                stop3 = shape[maindim]
            # The next line should be generalised if, in the future,
            # maindim is designed to be different from 0 in CArrays.
            # See ticket #199.
            object[start3:stop3] = self.__getitem__(tuple(slices))
        # Activate the conversion again (default)
        self._v_convert = True
        nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size

        return (object, nbytes)

    _g_copyWithStats = previous_api(_g_copy_with_stats)
Example #19
0
import tables as t
from tables import linkextension
from tables.node import Node
from tables.utils import lazyattr
from tables.attributeset import AttributeSet
import tables.file
from tables._past import previous_api, previous_api_property


def _g_get_link_class(parent_id, name):
    """Guess the link class."""

    return linkextension._get_link_class(parent_id, name)


_g_getLinkClass = previous_api(_g_get_link_class)


class Link(Node):
    """Abstract base class for all PyTables links.

    A link is a node that refers to another node.  The Link class inherits from
    Node class and the links that inherits from Link are SoftLink and
    ExternalLink.  There is not a HardLink subclass because hard links behave
    like a regular Group or Leaf.  Contrarily to other nodes, links cannot have
    HDF5 attributes.  This is an HDF5 library limitation that might be solved
    in future releases.

    See :ref:`LinksTutorial` for a small tutorial on how to work with links.

    .. rubric:: Link attributes
Example #20
0
import os
import tables
from tables import linkextension
from tables.node import Node
from tables.utils import lazyattr
from tables.attributeset import AttributeSet
import tables.file
from tables._past import previous_api, previous_api_property


def _g_get_link_class(parent_id, name):
    """Guess the link class."""

    return linkextension._get_link_class(parent_id, name)

_g_getLinkClass = previous_api(_g_get_link_class)


class Link(Node):
    """Abstract base class for all PyTables links.

    A link is a node that refers to another node.  The Link class inherits from
    Node class and the links that inherits from Link are SoftLink and
    ExternalLink.  There is not a HardLink subclass because hard links behave
    like a regular Group or Leaf.  Contrarily to other nodes, links cannot have
    HDF5 attributes.  This is an HDF5 library limitation that might be solved
    in future releases.

    See :ref:`LinksTutorial` for a small tutorial on how to work with links.

    .. rubric:: Link attributes
Example #21
0
class Leaf(Node):
    """Abstract base class for all PyTables leaves.

    A leaf is a node (see the Node class in :class:`Node`) which hangs from a
    group (see the Group class in :class:`Group`) but, unlike a group, it can
    not have any further children below it (i.e. it is an end node).

    This definition includes all nodes which contain actual data (datasets
    handled by the Table - see :ref:`TableClassDescr`, Array -
    see :ref:`ArrayClassDescr`, CArray - see :ref:`CArrayClassDescr`, EArray -
    see :ref:`EArrayClassDescr`, and VLArray - see :ref:`VLArrayClassDescr`
    classes) and unsupported nodes (the UnImplemented
    class - :ref:`UnImplementedClassDescr`) these classes do in fact inherit
    from Leaf.


    .. rubric:: Leaf attributes

    These instance variables are provided in addition to those in Node
    (see :ref:`NodeClassDescr`):

    .. attribute:: byteorder

        The byte ordering of the leaf data *on disk*.  It will be either
        ``little`` or ``big``.

    .. attribute:: dtype

        The NumPy dtype that most closely matches this leaf type.

    .. attribute:: extdim

        The index of the enlargeable dimension (-1 if none).

    .. attribute:: nrows

        The length of the main dimension of the leaf data.

    .. attribute:: nrowsinbuf

        The number of rows that fit in internal input buffers.

        You can change this to fine-tune the speed or memory
        requirements of your application.

    .. attribute:: shape

        The shape of data in the leaf.

    """

    # Properties
    # ~~~~~~~~~~

    # Node property aliases
    # `````````````````````
    # These are a little hard to override, but so are properties.
    attrs = Node._v_attrs
    """The associated AttributeSet instance - see :ref:`AttributeSetClassDescr`
    (This is an easier-to-write alias of :attr:`Node._v_attrs`."""
    title = Node._v_title
    """A description for this node
    (This is an easier-to-write alias of :attr:`Node._v_title`)."""

    # Read-only node property aliases
    # ```````````````````````````````
    name = property(
        lambda self: self._v_name, None, None,
        """The name of this node in its parent group
        (This is an easier-to-write alias of :attr:`Node._v_name`).""")

    chunkshape = property(
        lambda self: getattr(self, '_v_chunkshape', None), None, None,
        """The HDF5 chunk size for chunked leaves (a tuple).

        This is read-only because you cannot change the chunk size of a
        leaf once it has been created.
        """)

    object_id = property(
        lambda self: self._v_objectid, None, None,
        """A node identifier, which may change from run to run.
        (This is an easier-to-write alias of :attr:`Node._v_objectid`).

        .. versionchanged:: 3.0
           The *objectID* property has been renamed into *object_id*.

        """)

    objectID = previous_api(object_id)

    ndim = property(
        lambda self: len(self.shape), None, None,
        """The number of dimensions of the leaf data.

        .. versionadded: 2.4""")

    # Lazy read-only attributes
    # `````````````````````````
    @lazyattr
    def filters(self):
        """Filter properties for this leaf.

        See Also
        --------
        Filters

        """

        return Filters._from_leaf(self)

    # Other properties
    # ````````````````
    def _getmaindim(self):
        if self.extdim < 0:
            return 0  # choose the first dimension
        return self.extdim

    maindim = property(
        _getmaindim, None, None, """The dimension along which iterators work.

        Its value is 0 (i.e. the first dimension) when the dataset is not
        extendable, and self.extdim (where available) for extendable ones.
        """)

    def _setflavor(self, flavor):
        self._v_file._check_writable()
        check_flavor(flavor)
        self._v_attrs.FLAVOR = self._flavor = flavor  # logs the change

    def _delflavor(self):
        del self._v_attrs.FLAVOR
        self._flavor = internal_flavor

    flavor = property(
        lambda self: self._flavor, _setflavor, _delflavor,
        """The type of data object read from this leaf.

        It can be any of 'numpy' or 'python'.

        You can (and are encouraged to) use this property to get, set
        and delete the FLAVOR HDF5 attribute of the leaf. When the leaf
        has no such attribute, the default flavor is used..
        """)

    size_on_disk = property(
        lambda self: self._get_storage_size(), None, None, """
        The size of this leaf's data in bytes as it is stored on disk.  If the
        data is compressed, this shows the compressed size.  In the case of
        uncompressed, chunked data, this may be slightly larger than the amount
        of data, due to partially filled chunks.
        """)

    # Special methods
    # ~~~~~~~~~~~~~~~
    def __init__(self,
                 parentnode,
                 name,
                 new=False,
                 filters=None,
                 byteorder=None,
                 _log=True):
        self._v_new = new
        """Is this the first time the node has been created?"""
        self.nrowsinbuf = None
        """
        The number of rows that fits in internal input buffers.

        You can change this to fine-tune the speed or memory
        requirements of your application.
        """
        self._flavor = None
        """Private storage for the `flavor` property."""

        if new:
            # Get filter properties from parent group if not given.
            if filters is None:
                filters = parentnode._v_filters
            self.__dict__['filters'] = filters  # bypass the property

            if byteorder not in (None, 'little', 'big'):
                raise ValueError(
                    "the byteorder can only take 'little' or 'big' values "
                    "and you passed: %s" % byteorder)
            self.byteorder = byteorder
            """The byte ordering of the leaf data *on disk*."""

        # Existing filters need not be read since `filters`
        # is a lazy property that automatically handles their loading.

        super(Leaf, self).__init__(parentnode, name, _log)

    def __len__(self):
        """Return the length of the main dimension of the leaf data.

        Please note that this may raise an OverflowError on 32-bit platforms
        for datasets having more than 2**31-1 rows.  This is a limitation of
        Python that you can work around by using the nrows or shape attributes.

        """

        return self.nrows

    def __str__(self):
        """The string representation for this object is its pathname in the
        HDF5 object tree plus some additional metainfo."""

        # Get this class name
        classname = self.__class__.__name__
        # The title
        title = self._v_title
        # The filters
        filters = ""
        if self.filters.fletcher32:
            filters += ", fletcher32"
        if self.filters.complevel:
            if self.filters.shuffle:
                filters += ", shuffle"
            filters += ", %s(%s)" % (self.filters.complib,
                                     self.filters.complevel)
        return "%s (%s%s%s) %r" % \
               (self._v_pathname, classname, self.shape, filters, title)

    # Private methods
    # ~~~~~~~~~~~~~~~
    def _g_post_init_hook(self):
        """Code to be run after node creation and before creation logging.

        This method gets or sets the flavor of the leaf.

        """

        super(Leaf, self)._g_post_init_hook()
        if self._v_new:  # set flavor of new node
            if self._flavor is None:
                self._flavor = internal_flavor
            else:  # flavor set at creation time, do not log
                if self._v_file.params['PYTABLES_SYS_ATTRS']:
                    self._v_attrs._g__setattr('FLAVOR', self._flavor)
        else:  # get flavor of existing node (if any)
            if self._v_file.params['PYTABLES_SYS_ATTRS']:
                flavor = getattr(self._v_attrs, 'FLAVOR', internal_flavor)
                self._flavor = flavor_alias_map.get(flavor, flavor)
            else:
                self._flavor = internal_flavor

    _g_postInitHook = previous_api(_g_post_init_hook)

    def _calc_chunkshape(self, expectedrows, rowsize, itemsize):
        """Calculate the shape for the HDF5 chunk."""

        # In case of a scalar shape, return the unit chunksize
        if self.shape == ():
            return (SizeType(1), )

        # Compute the chunksize
        MB = 1024 * 1024
        expected_mb = (expectedrows * rowsize) // MB
        chunksize = calc_chunksize(expected_mb)

        maindim = self.maindim
        # Compute the chunknitems
        chunknitems = chunksize // itemsize
        # Safeguard against itemsizes being extremely large
        if chunknitems == 0:
            chunknitems = 1
        chunkshape = list(self.shape)
        # Check whether trimming the main dimension is enough
        chunkshape[maindim] = 1
        newchunknitems = numpy.prod(chunkshape, dtype=SizeType)
        if newchunknitems <= chunknitems:
            chunkshape[maindim] = chunknitems // newchunknitems
        else:
            # No, so start trimming other dimensions as well
            for j in xrange(len(chunkshape)):
                # Check whether trimming this dimension is enough
                chunkshape[j] = 1
                newchunknitems = numpy.prod(chunkshape, dtype=SizeType)
                if newchunknitems <= chunknitems:
                    chunkshape[j] = chunknitems // newchunknitems
                    break
            else:
                # Ops, we ran out of the loop without a break
                # Set the last dimension to chunknitems
                chunkshape[-1] = chunknitems

        return tuple(SizeType(s) for s in chunkshape)

    def _calc_nrowsinbuf(self):
        """Calculate the number of rows that fits on a PyTables buffer."""

        params = self._v_file.params
        # Compute the nrowsinbuf
        rowsize = self.rowsize
        buffersize = params['IO_BUFFER_SIZE']
        if rowsize != 0:
            nrowsinbuf = buffersize // rowsize
        else:
            nrowsinbuf = 1

        # Safeguard against row sizes being extremely large
        if nrowsinbuf == 0:
            nrowsinbuf = 1
            # If rowsize is too large, issue a Performance warning
            maxrowsize = params['BUFFER_TIMES'] * buffersize
            if rowsize > maxrowsize:
                warnings.warn(
                    """\
The Leaf ``%s`` is exceeding the maximum recommended rowsize (%d bytes);
be ready to see PyTables asking for *lots* of memory and possibly slow
I/O.  You may want to reduce the rowsize by trimming the value of
dimensions that are orthogonal (and preferably close) to the *main*
dimension of this leave.  Alternatively, in case you have specified a
very small/large chunksize, you may want to increase/decrease it.""" %
                    (self._v_pathname, maxrowsize), PerformanceWarning)
        return nrowsinbuf

    # This method is appropriate for calls to __getitem__ methods
    def _process_range(self, start, stop, step, dim=None, warn_negstep=True):
        if dim is None:
            nrows = self.nrows  # self.shape[self.maindim]
        else:
            nrows = self.shape[dim]

        if warn_negstep and step and step < 0:
            raise ValueError("slice step cannot be negative")

        #if start is not None: start = long(start)
        #if stop is not None: stop = long(stop)
        #if step is not None: step = long(step)

        return slice(start, stop, step).indices(long(nrows))

    _processRange = previous_api(_process_range)

    # This method is appropriate for calls to read() methods
    def _process_range_read(self, start, stop, step, warn_negstep=True):
        nrows = self.nrows
        if start is not None and stop is None and step is None:
            # Protection against start greater than available records
            # nrows == 0 is a special case for empty objects
            if nrows > 0 and start >= nrows:
                raise IndexError("start of range (%s) is greater than "
                                 "number of rows (%s)" % (start, nrows))
            step = 1
            if start == -1:  # corner case
                stop = nrows
            else:
                stop = start + 1
        # Finally, get the correct values (over the main dimension)
        start, stop, step = self._process_range(start,
                                                stop,
                                                step,
                                                warn_negstep=warn_negstep)
        return (start, stop, step)

    _processRangeRead = previous_api(_process_range_read)

    def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs):
        # Compute default arguments.
        start = kwargs.pop('start', None)
        stop = kwargs.pop('stop', None)
        step = kwargs.pop('step', None)
        title = kwargs.pop('title', self._v_title)
        filters = kwargs.pop('filters', self.filters)
        chunkshape = kwargs.pop('chunkshape', self.chunkshape)
        copyuserattrs = kwargs.pop('copyuserattrs', True)
        stats = kwargs.pop('stats', None)
        if chunkshape == 'keep':
            chunkshape = self.chunkshape  # Keep the original chunkshape
        elif chunkshape == 'auto':
            chunkshape = None  # Will recompute chunkshape

        # Fix arguments with explicit None values for backwards compatibility.
        if title is None:
            title = self._v_title
        if filters is None:
            filters = self.filters

        # Create a copy of the object.
        (new_node, bytes) = self._g_copy_with_stats(newparent, newname, start,
                                                    stop, step, title, filters,
                                                    chunkshape, _log, **kwargs)

        # Copy user attributes if requested (or the flavor at least).
        if copyuserattrs:
            self._v_attrs._g_copy(new_node._v_attrs, copyclass=True)
        elif 'FLAVOR' in self._v_attrs:
            if self._v_file.params['PYTABLES_SYS_ATTRS']:
                new_node._v_attrs._g__setattr('FLAVOR', self._flavor)
        new_node._flavor = self._flavor  # update cached value

        # Update statistics if needed.
        if stats is not None:
            stats['leaves'] += 1
            stats['bytes'] += bytes

        return new_node

    def _g_fix_byteorder_data(self, data, dbyteorder):
        "Fix the byteorder of data passed in constructors."
        dbyteorder = byteorders[dbyteorder]
        # If self.byteorder has not been passed as an argument of
        # the constructor, then set it to the same value of data.
        if self.byteorder is None:
            self.byteorder = dbyteorder
        # Do an additional in-place byteswap of data if the in-memory
        # byteorder doesn't match that of the on-disk.  This is the only
        # place that we have to do the conversion manually. In all the
        # other cases, it will be HDF5 the responsible of doing the
        # byteswap properly.
        if dbyteorder in ['little', 'big']:
            if dbyteorder != self.byteorder:
                # if data is not writeable, do a copy first
                if not data.flags.writeable:
                    data = data.copy()
                data.byteswap(True)
        else:
            # Fix the byteorder again, no matter which byteorder have
            # specified the user in the constructor.
            self.byteorder = "irrelevant"
        return data

    def _point_selection(self, key):
        """Perform a point-wise selection.

        `key` can be any of the following items:

        * A boolean array with the same shape than self. Those positions
          with True values will signal the coordinates to be returned.

        * A numpy array (or list or tuple) with the point coordinates.
          This has to be a two-dimensional array of size len(self.shape)
          by num_elements containing a list of of zero-based values
          specifying the coordinates in the dataset of the selected
          elements. The order of the element coordinates in the array
          specifies the order in which the array elements are iterated
          through when I/O is performed. Duplicate coordinate locations
          are not checked for.

        Return the coordinates array.  If this is not possible, raise a
        `TypeError` so that the next selection method can be tried out.

        This is useful for whatever `Leaf` instance implementing a
        point-wise selection.

        """

        if type(key) in (list, tuple):
            if isinstance(key, tuple) and len(key) > len(self.shape):
                raise IndexError("Invalid index or slice: %r" % (key, ))
            # Try to convert key to a numpy array.  If not possible,
            # a TypeError will be issued (to be catched later on).
            try:
                key = numpy.array(key)
            except ValueError:
                raise TypeError("Invalid index or slice: %r" % (key, ))
        elif not isinstance(key, numpy.ndarray):
            raise TypeError("Invalid index or slice: %r" % (key, ))

        # Protection against empty keys
        if len(key) == 0:
            return numpy.array([], dtype="i8")

        if key.dtype.kind == 'b':
            if not key.shape == self.shape:
                raise IndexError(
                    "Boolean indexing array has incompatible shape")
            # Get the True coordinates (64-bit indices!)
            coords = numpy.asarray(key.nonzero(), dtype='i8')
            coords = numpy.transpose(coords)
        elif key.dtype.kind == 'i' or key.dtype.kind == 'u':
            if len(key.shape) > 2:
                raise IndexError(
                    "Coordinate indexing array has incompatible shape")
            elif len(key.shape) == 2:
                if key.shape[0] != len(self.shape):
                    raise IndexError(
                        "Coordinate indexing array has incompatible shape")
                coords = numpy.asarray(key, dtype="i8")
                coords = numpy.transpose(coords)
            else:
                # For 1-dimensional datasets
                coords = numpy.asarray(key, dtype="i8")

            # handle negative indices
            idx = coords < 0
            coords[idx] = (coords + self.shape)[idx]

            # bounds check
            if numpy.any(coords < 0) or numpy.any(coords >= self.shape):
                raise IndexError("Index out of bounds")
        else:
            raise TypeError("Only integer coordinates allowed.")
        # We absolutely need a contiguous array
        if not coords.flags.contiguous:
            coords = coords.copy()
        return coords

    _pointSelection = previous_api(_point_selection)

    # Public methods
    # ~~~~~~~~~~~~~~
    # Tree manipulation
    # `````````````````
    def remove(self):
        """Remove this node from the hierarchy.

        This method has the behavior described
        in :meth:`Node._f_remove`. Please note that there is no recursive flag
        since leaves do not have child nodes.

        """

        self._f_remove(False)

    def rename(self, newname):
        """Rename this node in place.

        This method has the behavior described in :meth:`Node._f_rename()`.

        """

        self._f_rename(newname)

    def move(self,
             newparent=None,
             newname=None,
             overwrite=False,
             createparents=False):
        """Move or rename this node.

        This method has the behavior described in :meth:`Node._f_move`

        """

        self._f_move(newparent, newname, overwrite, createparents)

    def copy(self,
             newparent=None,
             newname=None,
             overwrite=False,
             createparents=False,
             **kwargs):
        """Copy this node and return the new one.

        This method has the behavior described in :meth:`Node._f_copy`. Please
        note that there is no recursive flag since leaves do not have child
        nodes.

        .. warning::

            Note that unknown parameters passed to this method will be
            ignored, so may want to double check the spelling of these
            (i.e. if you write them incorrectly, they will most probably
            be ignored).

        Parameters
        ----------
        title
            The new title for the destination. If omitted or None, the original
            title is used.
        filters : Filters
            Specifying this parameter overrides the original filter properties
            in the source node. If specified, it must be an instance of the
            Filters class (see :ref:`FiltersClassDescr`). The default is to
            copy the filter properties from the source node.
        copyuserattrs
            You can prevent the user attributes from being copied by setting
            this parameter to False. The default is to copy them.
        start, stop, step : int
            Specify the range of rows to be copied; the default is to copy all
            the rows.
        stats
            This argument may be used to collect statistics on the copy
            process. When used, it should be a dictionary with keys 'groups',
            'leaves' and 'bytes' having a numeric value. Their values will be
            incremented to reflect the number of groups, leaves and bytes,
            respectively, that have been copied during the operation.
        chunkshape
            The chunkshape of the new leaf.  It supports a couple of special
            values.  A value of keep means that the chunkshape will be the same
            than original leaf (this is the default).  A value of auto means
            that a new shape will be computed automatically in order to ensure
            best performance when accessing the dataset through the main
            dimension.  Any other value should be an integer or a tuple
            matching the dimensions of the leaf.

        """

        return self._f_copy(newparent, newname, overwrite, createparents,
                            **kwargs)

    def truncate(self, size):
        """Truncate the main dimension to be size rows.

        If the main dimension previously was larger than this size, the extra
        data is lost.  If the main dimension previously was shorter, it is
        extended, and the extended part is filled with the default values.

        The truncation operation can only be applied to *enlargeable* datasets,
        else a TypeError will be raised.

        """

        # A non-enlargeable arrays (Array, CArray) cannot be truncated
        if self.extdim < 0:
            raise TypeError("non-enlargeable datasets cannot be truncated")
        self._g_truncate(size)

    def isvisible(self):
        """Is this node visible?

        This method has the behavior described in :meth:`Node._f_isvisible()`.

        """

        return self._f_isvisible()

    isVisible = previous_api(isvisible)

    # Attribute handling
    # ``````````````````
    def get_attr(self, name):
        """Get a PyTables attribute from this node.

        This method has the behavior described in :meth:`Node._f_getattr`.

        """

        return self._f_getattr(name)

    getAttr = previous_api(get_attr)

    def set_attr(self, name, value):
        """Set a PyTables attribute for this node.

        This method has the behavior described in :meth:`Node._f_setattr()`.

        """

        self._f_setattr(name, value)

    setAttr = previous_api(set_attr)

    def del_attr(self, name):
        """Delete a PyTables attribute from this node.

        This method has the behavior described in :meth:`Node_f_delAttr`.

        """

        self._f_delattr(name)

    delAttr = previous_api(del_attr)

    # Data handling
    # `````````````
    def flush(self):
        """Flush pending data to disk.

        Saves whatever remaining buffered data to disk. It also releases
        I/O buffers, so if you are filling many datasets in the same
        PyTables session, please call flush() extensively so as to help
        PyTables to keep memory requirements low.

        """

        self._g_flush()

    def _f_close(self, flush=True):
        """Close this node in the tree.

        This method has the behavior described in :meth:`Node._f_close`.
        Besides that, the optional argument flush tells whether to flush
        pending data to disk or not before closing.

        """

        if not self._v_isopen:
            return  # the node is already closed or not initialized

        # Only do a flush in case the leaf has an IO buffer.  The
        # internal buffers of HDF5 will be flushed afterwards during the
        # self._g_close() call.  Avoiding an unnecessary flush()
        # operation accelerates the closing for the unbuffered leaves.
        if flush and hasattr(self, "_v_iobuf"):
            self.flush()

        # Close the dataset and release resources
        self._g_close()

        # Close myself as a node.
        super(Leaf, self)._f_close()

    def close(self, flush=True):
        """Close this node in the tree.

        This method is completely equivalent to :meth:`Leaf._f_close`.

        """

        self._f_close(flush)
Example #22
0
                   [path for path in alivenodes],
                   lambda path: alivenodes[path])

        # Next, revive the dead nodes, close and delete them
        # so they are not placed in the limbo again.
        # These two steps ensure tables are closed *before* their indices.
        closenodes(prefix,
                   [path for path in deadnodes
                        if '/_i_' not in path],  # not indices
                   lambda path: revivenode(path))
        # Close everything else (i.e. indices)
        closenodes(prefix,
                   [path for path in deadnodes],
                   lambda path: revivenode(path))

    _g_closeDescendents = previous_api(_g_close_descendents)

    def _g_close(self):
        """Close this (open) group."""

        # hdf5extension operations:
        #   Close HDF5 group.
        self._g_close_group()

        # Close myself as a node.
        super(Group, self)._f_close()

    def _f_close(self):
        """Close this group and all its descendents.

        This method has the behavior described in :meth:`Node._f_close`.
Example #23
0
        # We need to use this strange way to obtain a dtype compliant
        # array because NumPy doesn't honor the shape of the dtype when
        # it is multidimensional.  See:
        # http://scipy.org/scipy/numpy/ticket/926
        # for details.
        # All of this is done just to taking advantage of the NumPy
        # broadcasting rules.
        newshape = nparr.shape[:-len(atom.dtype.shape)]
        nparr2 = numpy.empty(newshape, dtype=[('', atom.dtype)])
        nparr2['f0'][:] = nparr
        # Return a view (i.e. get rid of the record type)
        nparr = nparr2.view(atom.dtype)
    return nparr


convertToNPAtom = previous_api(convert_to_np_atom)


# The next is used in Array, EArray and VLArray, and it is a bit more
# high level than convert_to_np_atom
def convert_to_np_atom2(object, atom):
    """Convert a generic object into a NumPy object compliant with atom."""

    # Check whether the object needs to be copied to make the operation
    # safe to in-place conversion.
    copy = atom.type in ['time64']
    nparr = convert_to_np_atom(object, atom, copy)
    # Finally, check the byteorder and change it if needed
    byteorder = byteorders[nparr.dtype.byteorder]
    if (byteorder in ['little', 'big'] and byteorder != sys.byteorder):
        # The byteorder needs to be fixed (a copy is made
Example #24
0
    # Now, create the new group. This works even if dstgroup == '/'
    for nodeName in dstgroup.split('/'):
        if nodeName == '':
            continue
        # First try if possible intermediate groups does already exist.
        try:
            group2 = dstfileh.get_node(group, nodeName)
        except NoSuchNodeError:
            # The group does not exist. Create it.
            group2 = dstfileh.create_group(group, nodeName,
                                           title=title,
                                           filters=filters)
        group = group2
    return group

newdstGroup = previous_api(newdst_group)


def recreate_indexes(table, dstfileh, dsttable):
    listoldindexes = table._listoldindexes
    if listoldindexes != []:
        if not regoldindexes:
            if verbose:
                print("[I]Not regenerating indexes for table: '%s:%s'" %
                      (dstfileh.filename, dsttable._v_pathname))
            return
        # Now, recreate the indexed columns
        if verbose:
            print("[I]Regenerating indexes for table: '%s:%s'" %
                  (dstfileh.filename, dsttable._v_pathname))
        for colname in listoldindexes:
Example #25
0
class RootGroup(Group):

    _v_objectId = previous_api_property('_v_objectid')

    def __init__(self, ptfile, name, title, new, filters):
        mydict = self.__dict__

        # Set group attributes.
        self._v_version = obversion
        self._v_new = new
        if new:
            self._v_new_title = title
            self._v_new_filters = filters
        else:
            self._v_new_title = None
            self._v_new_filters = None

        # Set node attributes.
        self._v_file = ptfile
        self._v_isopen = True  # root is always open
        self._v_pathname = '/'
        self._v_name = '/'
        self._v_depth = 0
        self._v_max_group_width = ptfile.params['MAX_GROUP_WIDTH']
        self._v__deleting = False
        self._v_objectid = None  # later

        # Only the root node has the file as a parent.
        # Bypass __setattr__ to avoid the ``Node._v_parent`` property.
        mydict['_v_parent'] = ptfile
        ptfile._node_manager.register_node(self, '/')

        # hdf5extension operations (do before setting an AttributeSet):
        #   Update node attributes.
        self._g_new(ptfile, name, init=True)
        #   Open the node and get its object ID.
        self._v_objectid = self._g_open()

        # Set disk attributes and read children names.
        #
        # This *must* be postponed because this method needs the root node
        # to be created and bound to ``File.root``.
        # This is an exception to the rule, handled by ``File.__init()__``.
        #
        # self._g_post_init_hook()

    def _g_load_child(self, childname):
        """Load a child node from disk.

        The child node `childname` is loaded from disk and an adequate
        `Node` object is created and returned.  If there is no such
        child, a `NoSuchNodeError` is raised.

        """

        if self._v_file.root_uep != "/":
            childname = join_path(self._v_file.root_uep, childname)
        # Is the node a group or a leaf?
        node_type = self._g_check_has_child(childname)

        # Nodes that HDF5 report as H5G_UNKNOWN
        if node_type == 'Unknown':
            return Unknown(self, childname)

        # Guess the PyTables class suited to the node,
        # build a PyTables node and return it.
        if node_type == "Group":
            if self._v_file.params['PYTABLES_SYS_ATTRS']:
                ChildClass = self._g_get_child_group_class(childname)
            else:
                # Default is a Group class
                ChildClass = Group
            return ChildClass(self, childname, new=False)
        elif node_type == "Leaf":
            ChildClass = self._g_get_child_leaf_class(childname, warn=True)
            # Building a leaf may still fail because of unsupported types
            # and other causes.
            # return ChildClass(self, childname)  # uncomment for debugging
            try:
                return ChildClass(self, childname)
            except Exception as exc:  # XXX
                warnings.warn(
                    "problems loading leaf ``%s``::\n\n"
                    "  %s\n\n"
                    "The leaf will become an ``UnImplemented`` node."
                    % (self._g_join(childname), exc))
                # If not, associate an UnImplemented object to it
                return UnImplemented(self, childname)
        elif node_type == "SoftLink":
            return SoftLink(self, childname)
        elif node_type == "ExternalLink":
            return ExternalLink(self, childname)
        else:
            return UnImplemented(self, childname)

    _g_loadChild = previous_api(_g_load_child)

    def _f_rename(self, newname):
        raise NodeError("the root node can not be renamed")

    def _f_move(self, newparent=None, newname=None, createparents=False):
        raise NodeError("the root node can not be moved")

    def _f_remove(self, recursive=False):
        raise NodeError("the root node can not be removed")
Example #26
0
        newdict = {"columns": {}, }
        if '__doc__' in classdict:
            newdict['__doc__'] = classdict['__doc__']
        for b in bases:
            if "columns" in b.__dict__:
                newdict["columns"].update(b.__dict__["columns"])
        for k in classdict:
            # if not (k.startswith('__') or k.startswith('_v_')):
            # We let pass _v_ variables to configure class behaviour
            if not (k.startswith('__')):
                newdict["columns"][k] = classdict[k]

        # Return a new class with the "columns" attribute filled
        return type.__new__(cls, classname, bases, newdict)

metaIsDescription = previous_api(MetaIsDescription)


class IsDescription(object, metaclass=MetaIsDescription):
    """Description of the structure of a table or nested column.

    This class is designed to be used as an easy, yet meaningful way to
    describe the structure of new Table (see :ref:`TableClassDescr`) datasets
    or nested columns through the definition of *derived classes*. In order to
    define such a class, you must declare it as descendant of IsDescription,
    with as many attributes as columns you want in your table. The name of each
    attribute will become the name of a column, and its value will hold a
    description of it.

    Ordinary columns can be described using instances of the Col class (see
    :ref:`ColClassDescr`). Nested columns can be described by using classes
Example #27
0
class _ChildrenDict(tables.misc.proxydict.ProxyDict):
    def _get_value_from_container(self, container, key):
        return container._f_get_child(key)

    _getValueFromContainer = previous_api(_get_value_from_container)
Example #28
0
def new_node(h5file, **kwargs):
    """Creates a new file node object in the specified PyTables file object.

    Additional named arguments where and name must be passed to specify where
    the file node is to be created. Other named arguments such as title and
    filters may also be passed.

    The special named argument expectedsize, indicating an estimate of the
    file size in bytes, may also be passed. It returns the file node object.

    """

    return RAFileNode(None, h5file, **kwargs)


newNode = previous_api(new_node)


def open_node(node, mode='r'):
    """Opens an existing file node.

    Returns a file node object from the existing specified PyTables
    node. If mode is not specified or it is 'r', the file can only be
    read, and the pointer is positioned at the beginning of the file. If
    mode is 'a+', the file can be read and appended, and the pointer is
    positioned at the end of the file.

    """

    if mode == 'r':
        return ROFileNode(node)
Example #29
0
class Group(hdf5extension.Group, Node):
    """Basic PyTables grouping structure.

    Instances of this class are grouping structures containing *child*
    instances of zero or more groups or leaves, together with
    supporting metadata. Each group has exactly one *parent* group.

    Working with groups and leaves is similar in many ways to working
    with directories and files, respectively, in a Unix filesystem.
    As with Unix directories and files, objects in the object tree are
    often described by giving their full (or absolute) path names.
    This full path can be specified either as a string (like in
    '/group1/group2') or as a complete object path written in *natural
    naming* schema (like in file.root.group1.group2).

    A collateral effect of the *natural naming* schema is that the
    names of members in the Group class and its instances must be
    carefully chosen to avoid colliding with existing children node
    names.  For this reason and to avoid polluting the children
    namespace all members in a Group start with some reserved prefix,
    like _f_ (for public methods), _g_ (for private ones), _v_ (for
    instance variables) or _c_ (for class variables). Any attempt to
    create a new child node whose name starts with one of these
    prefixes will raise a ValueError exception.

    Another effect of natural naming is that children named after
    Python keywords or having names not valid as Python identifiers
    (e.g.  class, $a or 44) can not be accessed using the node.child
    syntax. You will be forced to use node._f_get_child(child) to
    access them (which is recommended for programmatic accesses).

    You will also need to use _f_get_child() to access an existing
    child node if you set a Python attribute in the Group with the
    same name as that node (you will get a NaturalNameWarning when
    doing this).

    Parameters
    ----------
    parentnode
        The parent :class:`Group` object.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*

    name : str
        The name of this node in its parent group.
    title
        The title for this group
    new
        If this group is new or has to be read from disk
    filters : Filters
        A Filters instance


    Notes
    -----
    The following documentation includes methods that are automatically
    called when a Group instance is accessed in a special way.

    For instance, this class defines the __setattr__, __getattr__, and
    __delattr__ methods, and they set, get and delete *ordinary Python
    attributes* as normally intended. In addition to that, __getattr__
    allows getting *child nodes* by their name for the sake of easy
    interaction on the command line, as long as there is no Python
    attribute with the same name. Groups also allow the interactive
    completion (when using readline) of the names of child nodes.
    For instance::

        # get a Python attribute
        nchild = group._v_nchildren

        # Add a Table child called 'table' under 'group'.
        h5file.create_table(group, 'table', myDescription)
        table = group.table          # get the table child instance
        group.table = 'foo'          # set a Python attribute

        # (PyTables warns you here about using the name of a child node.)
        foo = group.table            # get a Python attribute
        del group.table              # delete a Python attribute
        table = group.table          # get the table child instance again


    .. rubric:: Group attributes

    The following instance variables are provided in addition to those
    in Node (see :ref:`NodeClassDescr`):

    .. attribute:: _v_children

        Dictionary with all nodes hanging from this group.

    .. attribute:: _v_groups

        Dictionary with all groups hanging from this group.

    .. attribute:: _v_hidden

        Dictionary with all hidden nodes hanging from this group.

    .. attribute:: _v_leaves

        Dictionary with all leaves hanging from this group.

    .. attribute:: _v_links

        Dictionary with all links hanging from this group.

    .. attribute:: _v_unknown

        Dictionary with all unknown nodes hanging from this group.

    """

    # Class identifier.
    _c_classid = 'GROUP'

    _c_classId = previous_api_property('_c_classid')

    # Children containers that should be loaded only in a lazy way.
    # These are documented in the ``Group._g_add_children_names`` method.
    _c_lazy_children_attrs = (
        '__members__', '_v_children', '_v_groups', '_v_leaves',
        '_v_links', '_v_unknown', '_v_hidden')

    # `_v_nchildren` is a direct read-only shorthand
    # for the number of *visible* children in a group.
    def _g_getnchildren(self):
        return len(self._v_children)

    _v_nchildren = property(_g_getnchildren, None, None,
                            "The number of children hanging from this group.")

    # `_v_filters` is a direct read-write shorthand for the ``FILTERS``
    # attribute with the default `Filters` instance as a default value.
    def _g_getfilters(self):
        filters = getattr(self._v_attrs, 'FILTERS', None)
        if filters is None:
            filters = Filters()
        return filters

    def _g_setfilters(self, value):
        if not isinstance(value, Filters):
            raise TypeError(
                "value is not an instance of `Filters`: %r" % (value,))
        self._v_attrs.FILTERS = value

    def _g_delfilters(self):
        del self._v_attrs.FILTERS

    _v_filters = property(
        _g_getfilters, _g_setfilters, _g_delfilters,
        """Default filter properties for child nodes.

        You can (and are encouraged to) use this property to get, set and
        delete the FILTERS HDF5 attribute of the group, which stores a Filters
        instance (see :ref:`FiltersClassDescr`). When the group has no such
        attribute, a default Filters instance is used.
        """)

    _v_maxGroupWidth = previous_api_property('_v_max_group_width')

    def __init__(self, parentnode, name,
                 title="", new=False, filters=None,
                 _log=True):

        # Remember to assign these values in the root group constructor
        # if it does not use this one!

        # First, set attributes belonging to group objects.

        self._v_version = obversion
        """The object version of this group."""

        self._v_new = new
        """Is this the first time the node has been created?"""

        self._v_new_title = title
        """New title for this node."""

        self._v_new_filters = filters
        """New default filter properties for child nodes."""

        self._v_max_group_width = parentnode._v_file.params['MAX_GROUP_WIDTH']
        """Maximum number of children on each group before warning the user.

        .. versionchanged:: 3.0
           The *_v_maxGroupWidth* attribute has been renamed into
           *_v_max_group_width*.

        """

        # Finally, set up this object as a node.
        super(Group, self).__init__(parentnode, name, _log)

    def _g_post_init_hook(self):
        if self._v_new:
            if self._v_file.params['PYTABLES_SYS_ATTRS']:
                # Save some attributes for the new group on disk.
                set_attr = self._v_attrs._g__setattr
                # Set the title, class and version attributes.
                set_attr('TITLE', self._v_new_title)
                set_attr('CLASS', self._c_classid)
                set_attr('VERSION', self._v_version)

                # Set the default filter properties.
                newfilters = self._v_new_filters
                if newfilters is None:
                    # If no filters have been passed in the constructor,
                    # inherit them from the parent group, but only if they
                    # have been inherited or explicitly set.
                    newfilters = getattr(
                        self._v_parent._v_attrs, 'FILTERS', None)
                if newfilters is not None:
                    set_attr('FILTERS', newfilters)
        else:
            # If the file has PyTables format, get the VERSION attr
            if 'VERSION' in self._v_attrs._v_attrnamessys:
                self._v_version = self._v_attrs.VERSION
            else:
                self._v_version = "0.0 (unknown)"
            # We don't need to get more attributes from disk,
            # since the most important ones are defined as properties.

    _g_postInitHook = previous_api(_g_post_init_hook)

    def __del__(self):
        if (self._v_isopen and
            self._v_pathname in self._v_file._node_manager.registry and
                '_v_children' in self.__dict__):
            # The group is going to be killed.  Rebuild weak references
            # (that Python cancelled just before calling this method) so
            # that they are still usable if the object is revived later.
            selfref = weakref.ref(self)
            self._v_children.containerref = selfref
            self._v_groups.containerref = selfref
            self._v_leaves.containerref = selfref
            self._v_links.containerref = selfref
            self._v_unknown.containerref = selfref
            self._v_hidden.containerref = selfref

        super(Group, self).__del__()

    def _g_get_child_group_class(self, childname):
        """Get the class of a not-yet-loaded group child.

        `childname` must be the name of a *group* child.

        """

        childCID = self._g_get_gchild_attr(childname, 'CLASS')
        if childCID is not None and not isinstance(childCID, str):
            childCID = childCID.decode('utf-8')

        if childCID in class_id_dict:
            return class_id_dict[childCID]  # look up group class
        else:
            return Group  # default group class

    _g_getChildGroupClass = previous_api(_g_get_child_group_class)

    def _g_get_child_leaf_class(self, childname, warn=True):
        """Get the class of a not-yet-loaded leaf child.

        `childname` must be the name of a *leaf* child.  If the child
        belongs to an unknown kind of leaf, or if its kind can not be
        guessed, `UnImplemented` will be returned and a warning will be
        issued if `warn` is true.

        """

        if self._v_file.params['PYTABLES_SYS_ATTRS']:
            childCID = self._g_get_lchild_attr(childname, 'CLASS')
            if childCID is not None and not isinstance(childCID, str):
                childCID = childCID.decode('utf-8')
        else:
            childCID = None

        if childCID in class_id_dict:
            return class_id_dict[childCID]  # look up leaf class
        else:
            # Unknown or no ``CLASS`` attribute, try a guess.
            childCID2 = utilsextension.which_class(self._v_objectid, childname)
            if childCID2 == 'UNSUPPORTED':
                if warn:
                    if childCID is None:
                        warnings.warn(
                            "leaf ``%s`` is of an unsupported type; "
                            "it will become an ``UnImplemented`` node"
                            % self._g_join(childname))
                    else:
                        warnings.warn(
                            ("leaf ``%s`` has an unknown class ID ``%s``; "
                             "it will become an ``UnImplemented`` node")
                            % (self._g_join(childname), childCID))
                return UnImplemented
            assert childCID2 in class_id_dict
            return class_id_dict[childCID2]  # look up leaf class

    _g_getChildLeafClass = previous_api(_g_get_child_leaf_class)

    def _g_add_children_names(self):
        """Add children names to this group taking into account their
        visibility and kind."""

        mydict = self.__dict__

        # The names of the lazy attributes
        mydict['__members__'] = members = []
        """The names of visible children nodes for readline-style completion.
        """
        mydict['_v_children'] = children = _ChildrenDict(self)
        """The number of children hanging from this group."""
        mydict['_v_groups'] = groups = _ChildrenDict(self)
        """Dictionary with all groups hanging from this group."""
        mydict['_v_leaves'] = leaves = _ChildrenDict(self)
        """Dictionary with all leaves hanging from this group."""
        mydict['_v_links'] = links = _ChildrenDict(self)
        """Dictionary with all links hanging from this group."""
        mydict['_v_unknown'] = unknown = _ChildrenDict(self)
        """Dictionary with all unknown nodes hanging from this group."""
        mydict['_v_hidden'] = hidden = _ChildrenDict(self)
        """Dictionary with all hidden nodes hanging from this group."""

        # Get the names of *all* child groups and leaves.
        (group_names, leaf_names, link_names, unknown_names) = \
            self._g_list_group(self._v_parent)

        # Separate groups into visible groups and hidden nodes,
        # and leaves into visible leaves and hidden nodes.
        for (childnames, childdict) in ((group_names, groups),
                                        (leaf_names, leaves),
                                        (link_names, links),
                                        (unknown_names, unknown)):

            for childname in childnames:
                # See whether the name implies that the node is hidden.
                # (Assigned values are entirely irrelevant.)
                if isvisiblename(childname):
                    # Visible node.
                    members.insert(0, childname)
                    children[childname] = None
                    childdict[childname] = None
                else:
                    # Hidden node.
                    hidden[childname] = None

    _g_addChildrenNames = previous_api(_g_add_children_names)

    def _g_check_has_child(self, name):
        """Check whether 'name' is a children of 'self' and return its type."""

        # Get the HDF5 name matching the PyTables name.
        node_type = self._g_get_objinfo(name)
        if node_type == "NoSuchNode":
            raise NoSuchNodeError(
                "group ``%s`` does not have a child named ``%s``"
                % (self._v_pathname, name))
        return node_type

    _g_checkHasChild = previous_api(_g_check_has_child)

    def __iter__(self):
        """Iterate over the child nodes hanging directly from the group.

        This iterator is *not* recursive.

        Examples
        --------

        ::

            # Non-recursively list all the nodes hanging from '/detector'
            print("Nodes in '/detector' group:")
            for node in h5file.root.detector:
                print(node)

        """

        return self._f_iter_nodes()

    def __contains__(self, name):
        """Is there a child with that `name`?

        Returns a true value if the group has a child node (visible or
        hidden) with the given `name` (a string), false otherwise.

        """

        self._g_check_open()
        try:
            self._g_check_has_child(name)
        except NoSuchNodeError:
            return False
        return True

    def _f_walknodes(self, classname=None):
        """Iterate over descendant nodes.

        This method recursively walks *self* top to bottom (preorder),
        iterating over child groups in alphanumerical order, and yielding
        nodes.  If classname is supplied, only instances of the named class are
        yielded.

        If *classname* is Group, it behaves like :meth:`Group._f_walk_groups`,
        yielding only groups.  If you don't want a recursive behavior,
        use :meth:`Group._f_iter_nodes` instead.

        Examples
        --------

        ::

            # Recursively print all the arrays hanging from '/'
            print("Arrays in the object tree '/':")
            for array in h5file.root._f_walknodes('Array', recursive=True):
                print(array)

        """

        self._g_check_open()

        # For compatibility with old default arguments.
        if classname == '':
            classname = None

        if classname == "Group":
            # Recursive algorithm
            for group in self._f_walk_groups():
                yield group
        else:
            for group in self._f_walk_groups():
                for leaf in group._f_iter_nodes(classname):
                    yield leaf

    _f_walkNodes = previous_api(_f_walknodes)

    def _g_join(self, name):
        """Helper method to correctly concatenate a name child object with the
        pathname of this group."""

        if name == "/":
            # This case can happen when doing copies
            return self._v_pathname
        return join_path(self._v_pathname, name)

    def _g_width_warning(self):
        """Issue a :exc:`PerformanceWarning` on too many children."""

        warnings.warn("""\
group ``%s`` is exceeding the recommended maximum number of children (%d); \
be ready to see PyTables asking for *lots* of memory and possibly slow I/O."""
                      % (self._v_pathname, self._v_max_group_width),
                      PerformanceWarning)

    _g_widthWarning = previous_api(_g_width_warning)

    def _g_refnode(self, childnode, childname, validate=True):
        """Insert references to a `childnode` via a `childname`.

        Checks that the `childname` is valid and does not exist, then
        creates references to the given `childnode` by that `childname`.
        The validation of the name can be omitted by setting `validate`
        to a false value (this may be useful for adding already existing
        nodes to the tree).

        """

        # Check for name validity.
        if validate:
            check_name_validity(childname)
            childnode._g_check_name(childname)

        # Check if there is already a child with the same name.
        # This can be triggered because of the user
        # (via node construction or renaming/movement).
        # Links are not checked here because they are copied and referenced
        # using ``File.get_node`` so they already exist in `self`.
        if (not isinstance(childnode, Link)) and childname in self:
            raise NodeError(
                "group ``%s`` already has a child node named ``%s``"
                % (self._v_pathname, childname))

        # Show a warning if there is an object attribute with that name.
        if childname in self.__dict__:
            warnings.warn(
                "group ``%s`` already has an attribute named ``%s``; "
                "you will not be able to use natural naming "
                "to access the child node"
                % (self._v_pathname, childname), NaturalNameWarning)

        # Check group width limits.
        if (len(self._v_children) + len(self._v_hidden) >=
                self._v_max_group_width):
            self._g_width_warning()

        # Update members information.
        # Insert references to the new child.
        # (Assigned values are entirely irrelevant.)
        if isvisiblename(childname):
            # Visible node.
            self.__members__.insert(0, childname)  # enable completion
            self._v_children[childname] = None  # insert node
            if isinstance(childnode, Unknown):
                self._v_unknown[childname] = None
            elif isinstance(childnode, Link):
                self._v_links[childname] = None
            elif isinstance(childnode, Leaf):
                self._v_leaves[childname] = None
            elif isinstance(childnode, Group):
                self._v_groups[childname] = None
        else:
            # Hidden node.
            self._v_hidden[childname] = None  # insert node

    _g_refNode = previous_api(_g_refnode)

    def _g_unrefnode(self, childname):
        """Remove references to a node.

        Removes all references to the named node.

        """

        # This can *not* be triggered because of the user.
        assert childname in self, \
            ("group ``%s`` does not have a child node named ``%s``"
                % (self._v_pathname, childname))

        # Update members information, if needed
        if '_v_children' in self.__dict__:
            if childname in self._v_children:
                # Visible node.
                members = self.__members__
                member_index = members.index(childname)
                del members[member_index]  # disables completion

                del self._v_children[childname]  # remove node
                self._v_unknown.pop(childname, None)
                self._v_links.pop(childname, None)
                self._v_leaves.pop(childname, None)
                self._v_groups.pop(childname, None)
            else:
                # Hidden node.
                del self._v_hidden[childname]  # remove node

    _g_unrefNode = previous_api(_g_unrefnode)

    def _g_move(self, newparent, newname):
        # Move the node to the new location.
        oldpath = self._v_pathname
        super(Group, self)._g_move(newparent, newname)
        newpath = self._v_pathname

        # Update location information in children.  This node shouldn't
        # be affected since it has already been relocated.
        self._v_file._update_node_locations(oldpath, newpath)

    def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs):
        # Compute default arguments.
        title = kwargs.get('title', self._v_title)
        filters = kwargs.get('filters', None)
        stats = kwargs.get('stats', None)

        # Fix arguments with explicit None values for backwards compatibility.
        if title is None:
            title = self._v_title
        # If no filters have been passed to the call, copy them from the
        # source group, but only if inherited or explicitly set.
        if filters is None:
            filters = getattr(self._v_attrs, 'FILTERS', None)

        # Create a copy of the object.
        new_node = Group(newparent, newname,
                         title, new=True, filters=filters, _log=_log)

        # Copy user attributes if needed.
        if kwargs.get('copyuserattrs', True):
            self._v_attrs._g_copy(new_node._v_attrs, copyclass=True)

        # Update statistics if needed.
        if stats is not None:
            stats['groups'] += 1

        if recursive:
            # Copy child nodes if a recursive copy was requested.
            # Some arguments should *not* be passed to children copy ops.
            kwargs = kwargs.copy()
            kwargs.pop('title', None)
            self._g_copy_children(new_node, **kwargs)

        return new_node

    def _g_copy_children(self, newparent, **kwargs):
        """Copy child nodes.

        Copies all nodes descending from this one into the specified
        `newparent`.  If the new parent has a child node with the same
        name as one of the nodes in this group, the copy fails with a
        `NodeError`, maybe resulting in a partial copy.  Nothing is
        logged.

        """

        # Recursive version of children copy.
        # for srcchild in self._v_children.itervalues():
        ##    srcchild._g_copy_as_child(newparent, **kwargs)

        # Non-recursive version of children copy.
        use_hardlinks = kwargs.get('use_hardlinks', False)
        if use_hardlinks:
            address_map = kwargs.setdefault('address_map', {})

        parentstack = [(self, newparent)]  # [(source, destination), ...]
        while parentstack:
            (srcparent, dstparent) = parentstack.pop()

            if use_hardlinks:
                for srcchild in srcparent._v_children.itervalues():
                    addr, rc = srcchild._get_obj_info()
                    if rc > 1 and addr in address_map:
                        where, name = address_map[addr][0]
                        localsrc = os.path.join(where, name)
                        dstparent._v_file.create_hard_link(dstparent,
                                                           srcchild.name,
                                                           localsrc)
                        address_map[addr].append(
                            (dstparent._v_pathname, srcchild.name)
                        )

                        # Update statistics if needed.
                        stats = kwargs.pop('stats', None)
                        if stats is not None:
                            stats['hardlinks'] += 1
                    else:
                        dstchild = srcchild._g_copy_as_child(dstparent,
                                                             **kwargs)
                        if isinstance(srcchild, Group):
                            parentstack.append((srcchild, dstchild))

                        if rc > 1:
                            address_map[addr] = [
                                (dstparent._v_pathname, srcchild.name)
                            ]
            else:
                for srcchild in srcparent._v_children.itervalues():
                    dstchild = srcchild._g_copy_as_child(dstparent, **kwargs)
                    if isinstance(srcchild, Group):
                        parentstack.append((srcchild, dstchild))

    _g_copyChildren = previous_api(_g_copy_children)

    def _f_get_child(self, childname):
        """Get the child called childname of this group.

        If the child exists (be it visible or not), it is returned.  Else, a
        NoSuchNodeError is raised.

        Using this method is recommended over getattr() when doing programmatic
        accesses to children if childname is unknown beforehand or when its
        name is not a valid Python identifier.

        """

        self._g_check_open()

        self._g_check_has_child(childname)

        childpath = join_path(self._v_pathname, childname)
        return self._v_file._get_node(childpath)

    _f_getChild = previous_api(_f_get_child)

    def _f_list_nodes(self, classname=None):
        """Return a *list* with children nodes.

        This is a list-returning version of :meth:`Group._f_iter_nodes()`.

        """

        return list(self._f_iter_nodes(classname))

    _f_listNodes = previous_api(_f_list_nodes)

    def _f_iter_nodes(self, classname=None):
        """Iterate over children nodes.

        Child nodes are yielded alphanumerically sorted by node name.  If the
        name of a class derived from Node (see :ref:`NodeClassDescr`) is
        supplied in the classname parameter, only instances of that class (or
        subclasses of it) will be returned.

        This is an iterator version of :meth:`Group._f_list_nodes`.

        """

        self._g_check_open()

        if not classname:
            # Returns all the children alphanumerically sorted
            names = sorted(self._v_children.iterkeys())
            for name in names:
                yield self._v_children[name]
        elif classname == 'Group':
            # Returns all the groups alphanumerically sorted
            names = sorted(self._v_groups.iterkeys())
            for name in names:
                yield self._v_groups[name]
        elif classname == 'Leaf':
            # Returns all the leaves alphanumerically sorted
            names = sorted(self._v_leaves.iterkeys())
            for name in names:
                yield self._v_leaves[name]
        elif classname == 'Link':
            # Returns all the links alphanumerically sorted
            names = sorted(self._v_links.iterkeys())
            for name in names:
                yield self._v_links[name]
        elif classname == 'IndexArray':
            raise TypeError(
                "listing ``IndexArray`` nodes is not allowed")
        else:
            class_ = get_class_by_name(classname)

            children = self._v_children
            childnames = sorted(children.iterkeys())

            for childname in childnames:
                childnode = children[childname]
                if isinstance(childnode, class_):
                    yield childnode

    _f_iterNodes = previous_api(_f_iter_nodes)

    def _f_walk_groups(self):
        """Recursively iterate over descendent groups (not leaves).

        This method starts by yielding *self*, and then it goes on to
        recursively iterate over all child groups in alphanumerical order, top
        to bottom (preorder), following the same procedure.

        """

        self._g_check_open()

        stack = [self]
        yield self
        # Iterate over the descendants
        while stack:
            objgroup = stack.pop()
            groupnames = sorted(objgroup._v_groups.iterkeys())
            # Sort the groups before delivering. This uses the groups names
            # for groups in tree (in order to sort() can classify them).
            for groupname in groupnames:
                stack.append(objgroup._v_groups[groupname])
                yield objgroup._v_groups[groupname]

    _f_walkGroups = previous_api(_f_walk_groups)

    def __delattr__(self, name):
        """Delete a Python attribute called name.

        This method deletes an *ordinary Python attribute* from the object.
        It does *not* remove children nodes from this group; for that,
        use :meth:`File.remove_node` or :meth:`Node._f_remove`.
        It does *neither* delete a PyTables node attribute; for that,
        use :meth:`File.del_node_attr`, :meth:`Node._f_delattr` or
        :attr:`Node._v_attrs``.

        If there is an attribute and a child node with the same name,
        the child node will be made accessible again via natural naming.

        """

        try:
            super(Group, self).__delattr__(name)  # nothing particular
        except AttributeError as ae:
            hint = " (use ``node._f_remove()`` if you want to remove a node)"
            raise ae.__class__(str(ae) + hint)

    def __getattr__(self, name):
        """Get a Python attribute or child node called name.

        If the object has a Python attribute called name, its value is
        returned. Else, if the node has a child node called name, it is
        returned.  Else, an AttributeError is raised.

        """

        # That is true since a `NoSuchNodeError` is an `AttributeError`.
        mydict = self.__dict__
        if name in mydict:
            return mydict[name]
        elif name in self._c_lazy_children_attrs:
            self._g_add_children_names()
            return mydict[name]
        return self._f_get_child(name)

    def __setattr__(self, name, value):
        """Set a Python attribute called name with the given value.

        This method stores an *ordinary Python attribute* in the object. It
        does *not* store new children nodes under this group; for that, use the
        File.create*() methods (see the File class
        in :ref:`FileClassDescr`). It does *neither* store a PyTables node
        attribute; for that,
        use :meth:`File.set_node_attr`, :meth`:Node._f_setattr`
        or :attr:`Node._v_attrs`.

        If there is already a child node with the same name, a
        NaturalNameWarning will be issued and the child node will not be
        accessible via natural naming nor getattr(). It will still be available
        via :meth:`File.get_node`, :meth:`Group._f_get_child` and children
        dictionaries in the group (if visible).

        """

        # Show a warning if there is an child node with that name.
        #
        # ..note::
        #
        #   Using ``if name in self:`` is not right since that would
        #   require ``_v_children`` and ``_v_hidden`` to be already set
        #   when the very first attribute assignments are made.
        #   Moreover, this warning is only concerned about clashes with
        #   names used in natural naming, i.e. those in ``__members__``.
        #
        # ..note::
        #
        #   The check ``'__members__' in myDict`` allows attribute
        #   assignment to happen before calling `Group.__init__()`, by
        #   avoiding to look into the still not assigned ``__members__``
        #   attribute.  This allows subclasses to set up some attributes
        #   and then call the constructor of the superclass.  If the
        #   check above is disabled, that results in Python entering an
        #   endless loop on exit!

        mydict = self.__dict__
        if '__members__' in mydict and name in self.__members__:
            warnings.warn(
                "group ``%s`` already has a child node named ``%s``; "
                "you will not be able to use natural naming "
                "to access the child node"
                % (self._v_pathname, name), NaturalNameWarning)

        super(Group, self).__setattr__(name, value)

    def _f_flush(self):
        """Flush this Group."""

        self._g_check_open()
        self._g_flush_group()

    def _g_close_descendents(self):
        """Close all the *loaded* descendent nodes of this group."""

        node_manager = self._v_file._node_manager
        node_manager.close_subtree(self._v_pathname)

    _g_closeDescendents = previous_api(_g_close_descendents)

    def _g_close(self):
        """Close this (open) group."""

        if self._v_isopen:
            # hdf5extension operations:
            #   Close HDF5 group.
            self._g_close_group()

        # Close myself as a node.
        super(Group, self)._f_close()

    def _f_close(self):
        """Close this group and all its descendents.

        This method has the behavior described in :meth:`Node._f_close`.
        It should be noted that this operation closes all the nodes
        descending from this group.

        You should not need to close nodes manually because they are
        automatically opened/closed when they are loaded/evicted from
        the integrated LRU cache.

        """

        # If the group is already closed, return immediately
        if not self._v_isopen:
            return

        # First, close all the descendents of this group, unless a) the
        # group is being deleted (evicted from LRU cache) or b) the node
        # is being closed during an aborted creation, in which cases
        # this is not an explicit close issued by the user.
        if not (self._v__deleting or self._v_objectid is None):
            self._g_close_descendents()

        # When all the descendents have been closed, close this group.
        # This is done at the end because some nodes may still need to
        # be loaded during the closing process; thus this node must be
        # open until the very end.
        self._g_close()

    def _g_remove(self, recursive=False, force=False):
        """Remove (recursively if needed) the Group.

        This version correctly handles both visible and hidden nodes.

        """

        if self._v_nchildren > 0:
            if not (recursive or force):
                raise NodeError("group ``%s`` has child nodes; "
                                "please set `recursive` or `force` to true "
                                "to remove it"
                                % (self._v_pathname,))

            # First close all the descendents hanging from this group,
            # so that it is not possible to use a node that no longer exists.
            self._g_close_descendents()

        # Remove the node itself from the hierarchy.
        super(Group, self)._g_remove(recursive, force)

    def _f_copy(self, newparent=None, newname=None,
                overwrite=False, recursive=False, createparents=False,
                **kwargs):
        """Copy this node and return the new one.

        This method has the behavior described in :meth:`Node._f_copy`.
        In addition, it recognizes the following keyword arguments:

        Parameters
        ----------
        title
            The new title for the destination. If omitted or None, the
            original title is used. This only applies to the topmost
            node in recursive copies.
        filters : Filters
            Specifying this parameter overrides the original filter
            properties in the source node. If specified, it must be an
            instance of the Filters class (see :ref:`FiltersClassDescr`).
            The default is to copy the filter properties from the source
            node.
        copyuserattrs
            You can prevent the user attributes from being copied by setting
            thisparameter to False. The default is to copy them.
        stats
            This argument may be used to collect statistics on the copy
            process. When used, it should be a dictionary with keys 'groups',
            'leaves', 'links' and 'bytes' having a numeric value. Their values
            willbe incremented to reflect the number of groups, leaves and
            bytes, respectively, that have been copied during the operation.

        """

        return super(Group, self)._f_copy(
            newparent, newname,
            overwrite, recursive, createparents, **kwargs)

    def _f_copy_children(self, dstgroup, overwrite=False, recursive=False,
                         createparents=False, **kwargs):
        """Copy the children of this group into another group.

        Children hanging directly from this group are copied into dstgroup,
        which can be a Group (see :ref:`GroupClassDescr`) object or its
        pathname in string form. If createparents is true, the needed groups
        for the given destination group path to exist will be created.

        The operation will fail with a NodeError if there is a child node
        in the destination group with the same name as one of the copied
        children from this one, unless overwrite is true; in this case,
        the former child node is recursively removed before copying the
        later.

        By default, nodes descending from children groups of this node
        are not copied. If the recursive argument is true, all descendant
        nodes of this node are recursively copied.

        Additional keyword arguments may be passed to customize the
        copying process. For instance, title and filters may be changed,
        user attributes may be or may not be copied, data may be sub-sampled,
        stats may be collected, etc. Arguments unknown to nodes are simply
        ignored. Check the documentation for copying operations of nodes to
        see which options they support.

        """

        self._g_check_open()

        # `dstgroup` is used instead of its path to avoid accepting
        # `Node` objects when `createparents` is true.  Also, note that
        # there is no risk of creating parent nodes and failing later
        # because of destination nodes already existing.
        dstparent = self._v_file._get_or_create_path(dstgroup, createparents)
        self._g_check_group(dstparent)  # Is it a group?

        if not overwrite:
            # Abort as early as possible when destination nodes exist
            # and overwriting is not enabled.
            for childname in self._v_children:
                if childname in dstparent:
                    raise NodeError(
                        "destination group ``%s`` already has "
                        "a node named ``%s``; "
                        "you may want to use the ``overwrite`` argument"
                        % (dstparent._v_pathname, childname))

        use_hardlinks = kwargs.get('use_hardlinks', False)
        if use_hardlinks:
            address_map = kwargs.setdefault('address_map', {})

            for child in self._v_children.itervalues():
                addr, rc = child._get_obj_info()
                if rc > 1 and addr in address_map:
                    where, name = address_map[addr][0]
                    localsrc = os.path.join(where, name)
                    dstparent._v_file.create_hard_link(dstparent, child.name,
                                                       localsrc)
                    address_map[addr].append(
                        (dstparent._v_pathname, child.name)
                    )

                    # Update statistics if needed.
                    stats = kwargs.pop('stats', None)
                    if stats is not None:
                        stats['hardlinks'] += 1
                else:
                    child._f_copy(dstparent, None, overwrite, recursive,
                                  **kwargs)
                    if rc > 1:
                        address_map[addr] = [
                            (dstparent._v_pathname, child.name)
                        ]
        else:
            for child in self._v_children.itervalues():
                child._f_copy(dstparent, None, overwrite, recursive, **kwargs)

    _f_copyChildren = previous_api(_f_copy_children)

    def __str__(self):
        """Return a short string representation of the group.

        Examples
        --------

        ::

            >>> f=tables.open_file('data/test.h5')
            >>> print(f.root.group0)
            /group0 (Group) 'First Group'

        """

        pathname = self._v_pathname
        classname = self.__class__.__name__
        title = self._v_title
        return "%s (%s) %r" % (pathname, classname, title)

    def __repr__(self):
        """Return a detailed string representation of the group.

        Examples
        --------

        ::

            >>> f = tables.open_file('data/test.h5')
            >>> f.root.group0
            /group0 (Group) 'First Group'
              children := ['tuple1' (Table), 'group1' (Group)]

        """

        rep = [
            '%r (%s)' % (childname, child.__class__.__name__)
            for (childname, child) in self._v_children.iteritems()
        ]
        childlist = '[%s]' % (', '.join(rep))

        return "%s\n  children := %s" % (str(self), childlist)
Example #30
0
    if keyword.iskeyword(name):
        warnings.warn("object name is a Python keyword: %r; %s"
                      % (name, warnInfo), NaturalNameWarning)
        return

    # Still, names starting with reserved prefixes are not allowed.
    if _reserved_id_re.match(name):
        raise ValueError("object name starts with a reserved prefix: %r; "
                         "it matches the pattern ``%s``"
                         % (name, _reserved_id_re.pattern))

    # ``__members__`` is the only exception to that rule.
    if name == '__members__':
        raise ValueError("``__members__`` is not allowed as an object name")

checkNameValidity = previous_api(check_name_validity)


def join_path(parentpath, name):
    """Join a *canonical* `parentpath` with a *non-empty* `name`.

    .. versionchanged:: 3.0
       The *parentPath* parameter has been renamed into *parentpath*.

    >>> join_path('/', 'foo')
    '/foo'
    >>> join_path('/foo', 'bar')
    '/foo/bar'
    >>> join_path('/foo', '/foo2/bar')
    '/foo/foo2/bar'
    >>> join_path('/foo', '/')
Example #31
0
class Enum(object):
    """Enumerated type.

    Each instance of this class represents an enumerated type. The
    values of the type must be declared
    *exhaustively* and named with
    *strings*, and they might be given explicit
    concrete values, though this is not compulsory. Once the type is
    defined, it can not be modified.

    There are three ways of defining an enumerated type. Each one
    of them corresponds to the type of the only argument in the
    constructor of Enum:

    - *Sequence of names*: each enumerated
      value is named using a string, and its order is determined by
      its position in the sequence; the concrete value is assigned
      automatically::

          >>> boolEnum = Enum(['True', 'False'])

    - *Mapping of names*: each enumerated
      value is named by a string and given an explicit concrete value.
      All of the concrete values must be different, or a
      ValueError will be raised::

          >>> priority = Enum({'red': 20, 'orange': 10, 'green': 0})
          >>> colors = Enum({'red': 1, 'blue': 1})
          Traceback (most recent call last):
          ...
          ValueError: enumerated values contain duplicate concrete values: 1

    - *Enumerated type*: in that case, a copy
      of the original enumerated type is created. Both enumerated
      types are considered equal::

          >>> prio2 = Enum(priority)
          >>> priority == prio2
          True

    Please note that names starting with _ are
    not allowed, since they are reserved for internal usage::

        >>> prio2 = Enum(['_xx'])
        Traceback (most recent call last):
        ...
        ValueError: name of enumerated value can not start with ``_``: '_xx'

    The concrete value of an enumerated value is obtained by
    getting its name as an attribute of the Enum
    instance (see __getattr__()) or as an item (see
    __getitem__()). This allows comparisons between
    enumerated values and assigning them to ordinary Python
    variables::

        >>> redv = priority.red
        >>> redv == priority['red']
        True
        >>> redv > priority.green
        True
        >>> priority.red == priority.orange
        False

    The name of the enumerated value corresponding to a concrete
    value can also be obtained by using the
    __call__() method of the enumerated type. In this
    way you get the symbolic name to use it later with
    __getitem__()::

        >>> priority(redv)
        'red'
        >>> priority.red == priority[priority(priority.red)]
        True

    (If you ask, the __getitem__() method is
    not used for this purpose to avoid ambiguity in the case of using
    strings as concrete values.)

    """
    def __init__(self, enum):
        mydict = self.__dict__

        mydict['_names'] = {}
        mydict['_values'] = {}

        if isinstance(enum, list) or isinstance(enum, tuple):
            for (value, name) in enumerate(enum):  # values become 0, 1, 2...
                self._check_and_set_pair(name, value)
        elif isinstance(enum, dict):
            for (name, value) in enum.iteritems():
                self._check_and_set_pair(name, value)
        elif isinstance(enum, Enum):
            for (name, value) in enum._names.iteritems():
                self._check_and_set_pair(name, value)
        else:
            raise TypeError("""\
enumerations can only be created from \
sequences, mappings and other enumerations""")

    def _check_and_set_pair(self, name, value):
        """Check validity of enumerated value and insert it into type."""

        names = self._names
        values = self._values

        if not isinstance(name, basestring):
            raise TypeError("name of enumerated value is not a string: %r" %
                            (name, ))
        if name.startswith('_'):
            raise ValueError(
                "name of enumerated value can not start with ``_``: %r" % name)
        # This check is only necessary with a sequence base object.
        if name in names:
            raise ValueError("enumerated values contain duplicate names: %r" %
                             name)
        # This check is only necessary with a mapping base object.
        if value in values:
            raise ValueError(
                "enumerated values contain duplicate concrete values: %r" %
                value)

        names[name] = value
        values[value] = name
        self.__dict__[name] = value

    _checkAndSetPair = previous_api(_check_and_set_pair)

    def __getitem__(self, name):
        """Get the concrete value of the enumerated value with that name.

        The name of the enumerated value must be a string. If there is no value
        with that name in the enumeration, a KeyError is raised.

        Examples
        --------

        Let ``enum`` be an enumerated type defined as:

        >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5})

        then:

        >>> enum['T1']
        2
        >>> enum['foo']
        Traceback (most recent call last):
          ...
        KeyError: "no enumerated value with that name: 'foo'"

        """

        try:
            return self._names[name]
        except KeyError:
            raise KeyError("no enumerated value with that name: %r" % (name, ))

    def __setitem__(self, name, value):
        """This operation is forbidden."""
        raise IndexError("operation not allowed")

    def __delitem__(self, name):
        """This operation is forbidden."""
        raise IndexError("operation not allowed")

    def __getattr__(self, name):
        """Get the concrete value of the enumerated value with that name.

        The name of the enumerated value must be a string. If there is no value
        with that name in the enumeration, an AttributeError is raised.

        Examples
        --------
        Let ``enum`` be an enumerated type defined as:

        >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5})

        then:

        >>> enum.T1
        2
        >>> enum.foo
        Traceback (most recent call last):
          ...
        AttributeError: no enumerated value with that name: 'foo'

        """

        try:
            return self[name]
        except KeyError as ke:
            raise AttributeError(*ke.args)

    def __setattr__(self, name, value):
        """This operation is forbidden."""
        raise AttributeError("operation not allowed")

    def __delattr__(self, name):
        """This operation is forbidden."""
        raise AttributeError("operation not allowed")

    def __contains__(self, name):
        """Is there an enumerated value with that name in the type?

        If the enumerated type has an enumerated value with that name, True is
        returned.  Otherwise, False is returned. The name must be a string.

        This method does *not* check for concrete values matching a value in an
        enumerated type. For that, please use the :meth:`Enum.__call__` method.

        Examples
        --------
        Let ``enum`` be an enumerated type defined as:

        >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5})

        then:

        >>> 'T1' in enum
        True
        >>> 'foo' in enum
        False
        >>> 0 in enum
        Traceback (most recent call last):
          ...
        TypeError: name of enumerated value is not a string: 0
        >>> enum.T1 in enum  # Be careful with this!
        Traceback (most recent call last):
          ...
        TypeError: name of enumerated value is not a string: 2

        """

        if not isinstance(name, basestring):
            raise TypeError("name of enumerated value is not a string: %r" %
                            (name, ))
        return name in self._names

    def __call__(self, value, *default):
        """Get the name of the enumerated value with that concrete value.

        If there is no value with that concrete value in the enumeration and a
        second argument is given as a default, this is returned. Else, a
        ValueError is raised.

        This method can be used for checking that a concrete value belongs to
        the set of concrete values in an enumerated type.

        Examples
        --------
        Let ``enum`` be an enumerated type defined as:

        >>> enum = Enum({'T0': 0, 'T1': 2, 'T2': 5})

        then:

        >>> enum(5)
        'T2'
        >>> enum(42, None) is None
        True
        >>> enum(42)
        Traceback (most recent call last):
          ...
        ValueError: no enumerated value with that concrete value: 42

        """

        try:
            return self._values[value]
        except KeyError:
            if len(default) > 0:
                return default[0]
            raise ValueError(
                "no enumerated value with that concrete value: %r" % (value, ))

    def __len__(self):
        """Return the number of enumerated values in the enumerated type.

        Examples
        --------
        >>> len(Enum(['e%d' % i for i in range(10)]))
        10

        """

        return len(self._names)

    def __iter__(self):
        """Iterate over the enumerated values.

        Enumerated values are returned as (name, value) pairs *in no particular
        order*.

        Examples
        --------
        >>> enumvals = {'red': 4, 'green': 2, 'blue': 1}
        >>> enum = Enum(enumvals)
        >>> enumdict = dict([(name, value) for (name, value) in enum])
        >>> enumvals == enumdict
        True

        """

        for name_value in self._names.iteritems():
            yield name_value

    def __eq__(self, other):
        """Is the other enumerated type equivalent to this one?

        Two enumerated types are equivalent if they have exactly the same
        enumerated values (i.e. with the same names and concrete values).

        Examples
        --------

        Let ``enum*`` be enumerated types defined as:

        >>> enum1 = Enum({'T0': 0, 'T1': 2})
        >>> enum2 = Enum(enum1)
        >>> enum3 = Enum({'T1': 2, 'T0': 0})
        >>> enum4 = Enum({'T0': 0, 'T1': 2, 'T2': 5})
        >>> enum5 = Enum({'T0': 0})
        >>> enum6 = Enum({'T0': 10, 'T1': 20})

        then:

        >>> enum1 == enum1
        True
        >>> enum1 == enum2 == enum3
        True
        >>> enum1 == enum4
        False
        >>> enum5 == enum1
        False
        >>> enum1 == enum6
        False

        Comparing enumerated types with other kinds of objects produces
        a false result:

        >>> enum1 == {'T0': 0, 'T1': 2}
        False
        >>> enum1 == ['T0', 'T1']
        False
        >>> enum1 == 2
        False

        """

        if not isinstance(other, Enum):
            return False
        return self._names == other._names

    def __ne__(self, other):
        """Is the `other` enumerated type different from this one?

        Two enumerated types are different if they don't have exactly
        the same enumerated values (i.e. with the same names and
        concrete values).

        Examples
        --------

        Let ``enum*`` be enumerated types defined as:

        >>> enum1 = Enum({'T0': 0, 'T1': 2})
        >>> enum2 = Enum(enum1)
        >>> enum3 = Enum({'T1': 2, 'T0': 0})
        >>> enum4 = Enum({'T0': 0, 'T1': 2, 'T2': 5})
        >>> enum5 = Enum({'T0': 0})
        >>> enum6 = Enum({'T0': 10, 'T1': 20})

        then:

        >>> enum1 != enum1
        False
        >>> enum1 != enum2 != enum3
        False
        >>> enum1 != enum4
        True
        >>> enum5 != enum1
        True
        >>> enum1 != enum6
        True

        """

        return not self.__eq__(other)

    # XXX: API incompatible change for PyTables 3 line
    # Overriding __eq__ blocks inheritance of __hash__ in 3.x
    # def __hash__(self):
    #    return hash((self.__class__, tuple(self._names.items())))
    def __repr__(self):
        """Return the canonical string representation of the enumeration. The
        output of this method can be evaluated to give a new enumeration object
        that will compare equal to this one.

        Examples
        --------
        >>> repr(Enum({'name': 10}))
        "Enum({'name': 10})"

        """

        return 'Enum(%s)' % self._names