Ejemplo n.º 1
0
class CacheArray(NotLoggedMixin, EArray, indexesextension.CacheArray):
    """Container for keeping index caches of 1st and 2nd level."""

    # Class identifier.
    _c_classid = 'CACHEARRAY'

    _c_classId = previous_api_property('_c_classid')
Ejemplo n.º 2
0
class MarkG(NotLoggedMixin, Group):
    # Class identifier.
    _c_classid = 'MARKG'

    _c_classId = previous_api_property('_c_classid')

    import re
    _c_shadow_name_re = re.compile(r'^a[0-9]+$')

    def _g_width_warning(self):
        warnings.warn(
            """\
mark ``%s`` is exceeding the recommended maximum action storage (%d nodes);\
be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" %
            (self._v_pathname, self._v_max_group_width), PerformanceWarning)

    _g_widthWarning = previous_api(_g_width_warning)

    def _g_reset(self):
        """Empty action storage (nodes and attributes).

        This method empties all action storage kept in this node: nodes
        and attributes.
        """

        # Remove action storage nodes.
        for child in self._v_children.values():
            child._g_remove(True, True)

        # Remove action storage attributes.
        attrs = self._v_attrs
        shname = self._c_shadow_name_re
        for attrname in attrs._v_attrnamesuser[:]:
            if shname.match(attrname):
                attrs._g__delattr(attrname)
Ejemplo n.º 3
0
class LastRowArray(NotLoggedMixin, CArray, indexesextension.LastRowArray):
    """Container for keeping sorted and indices values of last row of
    an index."""

    # Class identifier.
    _c_classid = 'LASTROWARRAY'

    _c_classId = previous_api_property('_c_classid')
Ejemplo n.º 4
0
class TransactionGroupG(NotLoggedMixin, Group):
    _c_classid = 'TRANSGROUP'

    _c_classId = previous_api_property('_c_classid')

    def _g_width_warning(self):
        warnings.warn("""\
the number of transactions is exceeding the recommended maximum (%d);\
be ready to see PyTables asking for *lots* of memory and possibly slow I/O"""
                      % (self._v_max_group_width,), PerformanceWarning)

    _g_widthWarning = previous_api(_g_width_warning)
Ejemplo n.º 5
0
class ImageArray(Array):
    """Array containing an image.

    This class has no additional behaviour or functionality compared to
    that of an ordinary array.  It simply enables the user to open an
    ``IMAGE`` HDF5 node as a normal `Array` node in PyTables.

    """

    # Class identifier.
    _c_classid = 'IMAGE'

    _c_classId = previous_api_property('_c_classid')
Ejemplo n.º 6
0
class Unknown(Node):
    """This class represents nodes reported as *unknown* by the underlying
    HDF5 library.

    This class does not have any public instance variables or methods, except
    those inherited from the Node class.

    """

    # Class identifier
    _c_classid = 'UNKNOWN'

    _c_classId = previous_api_property('_c_classid')

    def __init__(self, parentnode, name):
        """Create the `Unknown` instance."""

        self._v_new = False
        super(Unknown, self).__init__(parentnode, name)

    def _g_new(self, parentnode, name, init=False):
        pass

    def _g_open(self):
        return 0

    def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs):
        # Silently avoid doing copies of unknown nodes
        return None

    def _g_delete(self, parent):
        pass

    def __str__(self):
        pathname = self._v_pathname
        classname = self.__class__.__name__
        return "%s (%s)" % (pathname, classname)

    def __repr__(self):
        return """%s
  NOTE: <The Unknown object represents a node which is reported as
         unknown by the underlying HDF5 library, but that might be
         supported in more recent HDF5 versions.>
""" % (str(self))
Ejemplo n.º 7
0
class EArray(CArray):
    """This class represents extendable, homogeneous datasets in an HDF5 file.

    The main difference between an EArray and a CArray (see
    :ref:`CArrayClassDescr`), from which it inherits, is that the former
    can be enlarged along one of its dimensions, the *enlargeable
    dimension*.  That means that the :attr:`Leaf.extdim` attribute (see
    :class:`Leaf`) of any EArray instance will always be non-negative.
    Multiple enlargeable dimensions might be supported in the future.

    New rows can be added to the end of an enlargeable array by using the
    :meth:`EArray.append` method.

    Parameters
    ----------
    parentnode
        The parent :class:`Group` object.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*.

    name : str
        The name of this node in its parent group.

    atom
        An `Atom` instance representing the *type* and *shape*
        of the atomic objects to be saved.

    shape
        The shape of the new array.  One (and only one) of
        the shape dimensions *must* be 0.  The dimension being 0
        means that the resulting `EArray` object can be extended
        along it.  Multiple enlargeable dimensions are not supported
        right now.

    title
        A description for this node (it sets the ``TITLE``
        HDF5 attribute on disk).

    filters
        An instance of the `Filters` class that provides information
        about the desired I/O filters to be applied during the life
        of this object.

    expectedrows
        A user estimate about the number of row elements that will
        be added to the growable dimension in the `EArray` node.
        If not provided, the default value is ``EXPECTED_ROWS_EARRAY``
        (see ``tables/parameters.py``).  If you plan to create either
        a much smaller or a much bigger `EArray` try providing a guess;
        this will optimize the HDF5 B-Tree creation and management
        process time and the amount of memory used.

    chunkshape
        The shape of the data chunk to be read or written in a single
        HDF5 I/O operation.  Filters are applied to those chunks of data.
        The dimensionality of `chunkshape` must be the same as that of
        `shape` (beware: no dimension should be 0 this time!).
        If ``None``, a sensible value is calculated based on the
        `expectedrows` parameter (which is recommended).

    byteorder
        The byteorder of the data *on disk*, specified as 'little' or
        'big'. If this is not specified, the byteorder is that of the
        platform.

    Examples
    --------

    See below a small example of the use of the `EArray` class.  The
    code is available in ``examples/earray1.py``::

        import tables
        import numpy

        fileh = tables.open_file('earray1.h5', mode='w')
        a = tables.StringAtom(itemsize=8)

        # Use ``a`` as the object type for the enlargeable array.
        array_c = fileh.create_earray(fileh.root, 'array_c', a, (0,),
                                      \"Chars\")
        array_c.append(numpy.array(['a'*2, 'b'*4], dtype='S8'))
        array_c.append(numpy.array(['a'*6, 'b'*8, 'c'*10], dtype='S8'))

        # Read the string ``EArray`` we have created on disk.
        for s in array_c:
            print 'array_c[%s] => %r' % (array_c.nrow, s)
        # Close the file.
        fileh.close()

    The output for the previous script is something like::

        array_c[0] => 'aa'
        array_c[1] => 'bbbb'
        array_c[2] => 'aaaaaa'
        array_c[3] => 'bbbbbbbb'
        array_c[4] => 'cccccccc'

    """

    # Class identifier.
    _c_classid = 'EARRAY'

    _c_classId = previous_api_property('_c_classid')

    # Special methods
    # ~~~~~~~~~~~~~~~
    def __init__(self,
                 parentnode,
                 name,
                 atom=None,
                 shape=None,
                 title="",
                 filters=None,
                 expectedrows=None,
                 chunkshape=None,
                 byteorder=None,
                 _log=True):

        # Specific of EArray
        if expectedrows is None:
            expectedrows = parentnode._v_file.params['EXPECTED_ROWS_EARRAY']
        self._v_expectedrows = expectedrows
        """The expected number of rows to be stored in the array."""

        # Call the parent (CArray) init code
        super(EArray, self).__init__(parentnode, name, atom, shape, title,
                                     filters, chunkshape, byteorder, _log)

    # Public and private methods
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~
    def _g_create(self):
        """Create a new array in file (specific part)."""

        # Pre-conditions and extdim computation
        zerodims = numpy.sum(numpy.array(self.shape) == 0)
        if zerodims > 0:
            if zerodims == 1:
                self.extdim = list(self.shape).index(0)
            else:
                raise NotImplementedError(
                    "Multiple enlargeable (0-)dimensions are not "
                    "supported.")
        else:
            raise ValueError("When creating EArrays, you need to set one of "
                             "the dimensions of the Atom instance to zero.")

        # Finish the common part of the creation process
        return self._g_create_common(self._v_expectedrows)

    def _check_shape_append(self, nparr):
        "Test that nparr shape is consistent with underlying EArray."

        # The arrays conforms self expandibility?
        myrank = len(self.shape)
        narank = len(nparr.shape) - len(self.atom.shape)
        if myrank != narank:
            raise ValueError(("the ranks of the appended object (%d) and the "
                              "``%s`` EArray (%d) differ") %
                             (narank, self._v_pathname, myrank))
        for i in range(myrank):
            if i != self.extdim and self.shape[i] != nparr.shape[i]:
                raise ValueError(("the shapes of the appended object and the "
                                  "``%s`` EArray differ in non-enlargeable "
                                  "dimension %d") % (self._v_pathname, i))

    _checkShapeAppend = previous_api(_check_shape_append)

    def append(self, sequence):
        """Add a sequence of data to the end of the dataset.

        The sequence must have the same type as the array; otherwise a
        TypeError is raised. In the same way, the dimensions of the
        sequence must conform to the shape of the array, that is, all
        dimensions must match, with the exception of the enlargeable
        dimension, which can be of any length (even 0!).  If the shape
        of the sequence is invalid, a ValueError is raised.

        """

        self._g_check_open()
        self._v_file._check_writable()

        # Convert the sequence into a NumPy object
        nparr = convert_to_np_atom2(sequence, self.atom)
        # Check if it has a consistent shape with underlying EArray
        self._check_shape_append(nparr)
        # If the size of the nparr is zero, don't do anything else
        if nparr.size > 0:
            self._append(nparr)

    def _g_copy_with_stats(self, group, name, start, stop, step, title,
                           filters, chunkshape, _log, **kwargs):
        """Private part of Leaf.copy() for each kind of leaf."""

        (start, stop, step) = self._process_range_read(start, stop, step)
        # Build the new EArray object
        maindim = self.maindim
        shape = list(self.shape)
        shape[maindim] = 0
        # The number of final rows
        nrows = len(xrange(start, stop, step))
        # Build the new EArray object
        object = EArray(group,
                        name,
                        atom=self.atom,
                        shape=shape,
                        title=title,
                        filters=filters,
                        expectedrows=nrows,
                        chunkshape=chunkshape,
                        _log=_log)
        # Now, fill the new earray with values from source
        nrowsinbuf = self.nrowsinbuf
        # The slices parameter for self.__getitem__
        slices = [slice(0, dim, 1) for dim in self.shape]
        # This is a hack to prevent doing unnecessary conversions
        # when copying buffers
        self._v_convert = False
        # Start the copy itself
        for start2 in xrange(start, stop, step * nrowsinbuf):
            # Save the records on disk
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the extensible dimension
            slices[maindim] = slice(start2, stop2, step)
            object._append(self.__getitem__(tuple(slices)))
        # Active the conversion again (default)
        self._v_convert = True
        nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.itemsize

        return (object, nbytes)

    _g_copyWithStats = previous_api(_g_copy_with_stats)
Ejemplo n.º 8
0
class VLArray(hdf5extension.VLArray, Leaf):
    """This class represents variable length (ragged) arrays in an HDF5 file.

    Instances of this class represent array objects in the object tree
    with the property that their rows can have a *variable* number of
    homogeneous elements, called *atoms*. Like Table datasets (see
    :ref:`TableClassDescr`), variable length arrays can have only one
    dimension, and the elements (atoms) of their rows can be fully
    multidimensional.

    When reading a range of rows from a VLArray, you will *always* get
    a Python list of objects of the current flavor (each of them for a
    row), which may have different lengths.

    This class provides methods to write or read data to or from
    variable length array objects in the file. Note that it also
    inherits all the public attributes and methods that Leaf (see
    :ref:`LeafClassDescr`) already provides.

    .. note::

          VLArray objects also support compression although compression
          is only performed on the data structures used internally by
          the HDF5 to take references of the location of the variable
          length data. Data itself (the raw data) are not compressed
          or filtered.

          Please refer to the `VLTypes Technical Note
          <http://www.hdfgroup.org/HDF5/doc/TechNotes/VLTypes.html>`_
          for more details on the topic.

    Parameters
    ----------
    parentnode
        The parent :class:`Group` object.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*.

    name : str
        The name of this node in its parent group.
    atom
        An `Atom` instance representing the *type* and *shape* of the atomic
        objects to be saved.
    title
        A description for this node (it sets the ``TITLE`` HDF5 attribute on
        disk).
    filters
        An instance of the `Filters` class that provides information about the
        desired I/O filters to be applied during the life of this object.
    expectedrows
        A user estimate about the number of row elements that will
        be added to the growable dimension in the `VLArray` node.
        If not provided, the default value is ``EXPECTED_ROWS_VLARRAY``
        (see ``tables/parameters.py``).  If you plan to create either
        a much smaller or a much bigger `VLArray` try providing a guess;
        this will optimize the HDF5 B-Tree creation and management
        process time and the amount of memory used.

        .. versionadded:: 3.0

    chunkshape
        The shape of the data chunk to be read or written in a single HDF5 I/O
        operation.  Filters are applied to those chunks of data.  The
        dimensionality of `chunkshape` must be 1.  If ``None``, a sensible
        value is calculated (which is recommended).
    byteorder
        The byteorder of the data *on disk*, specified as 'little' or 'big'.
        If this is not specified, the byteorder is that of the platform.

    .. versionchanged:: 3.0
       The *expectedsizeinMB* parameter has been replaced by *expectedrows*.

    Examples
    --------
    See below a small example of the use of the VLArray class.  The code is
    available in :file:`examples/vlarray1.py`::

        import tables
        from numpy import *

        # Create a VLArray:
        fileh = tables.open_file('vlarray1.h5', mode='w')
        vlarray = fileh.create_vlarray(fileh.root, 'vlarray1',
        tables.Int32Atom(shape=()),
                        "ragged array of ints",
                        filters=tables.Filters(1))

        # Append some (variable length) rows:
        vlarray.append(array([5, 6]))
        vlarray.append(array([5, 6, 7]))
        vlarray.append([5, 6, 9, 8])

        # Now, read it through an iterator:
        print('-->', vlarray.title)
        for x in vlarray:
            print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, x))

        # Now, do the same with native Python strings.
        vlarray2 = fileh.create_vlarray(fileh.root, 'vlarray2',
        tables.StringAtom(itemsize=2),
                            "ragged array of strings",
                            filters=tables.Filters(1))
        vlarray2.flavor = 'python'

        # Append some (variable length) rows:
        print('-->', vlarray2.title)
        vlarray2.append(['5', '66'])
        vlarray2.append(['5', '6', '77'])
        vlarray2.append(['5', '6', '9', '88'])

        # Now, read it through an iterator:
        for x in vlarray2:
            print('%s[%d]--> %s' % (vlarray2.name, vlarray2.nrow, x))

        # Close the file.
        fileh.close()

    The output for the previous script is something like::

        --> ragged array of ints
        vlarray1[0]--> [5 6]
        vlarray1[1]--> [5 6 7]
        vlarray1[2]--> [5 6 9 8]
        --> ragged array of strings
        vlarray2[0]--> ['5', '66']
        vlarray2[1]--> ['5', '6', '77']
        vlarray2[2]--> ['5', '6', '9', '88']


    .. rubric:: VLArray attributes

    The instance variables below are provided in addition to those in
    Leaf (see :ref:`LeafClassDescr`).

    .. attribute:: atom

        An Atom (see :ref:`AtomClassDescr`)
        instance representing the *type* and
        *shape* of the atomic objects to be
        saved. You may use a *pseudo-atom* for
        storing a serialized object or variable length string per row.

    .. attribute:: flavor

        The type of data object read from this leaf.

        Please note that when reading several rows of VLArray data,
        the flavor only applies to the *components* of the returned
        Python list, not to the list itself.

    .. attribute:: nrow

        On iterators, this is the index of the current row.

    .. attribute:: nrows

        The current number of rows in the array.

    .. attribute:: extdim

       The index of the enlargeable dimension (always 0 for vlarrays).

    """

    # Class identifier.
    _c_classid = 'VLARRAY'

    _c_classId = previous_api_property('_c_classid')

    # Lazy read-only attributes
    # `````````````````````````
    @lazyattr
    def dtype(self):
        """The NumPy ``dtype`` that most closely matches this array."""
        return self.atom.dtype

    # Properties
    # ~~~~~~~~~~
    shape = property(lambda self: (self.nrows, ), None, None,
                     "The shape of the stored array.")

    def _get_size_on_disk(self):
        raise NotImplementedError('size_on_disk not implemented for VLArrays')

    size_on_disk = property(
        _get_size_on_disk, None, None, """
        The HDF5 library does not include a function to determine size_on_disk
        for variable-length arrays.  Accessing this attribute will raise a
        NotImplementedError.
        """)

    size_in_memory = property(
        lambda self: self._get_memory_size(), None, None, """
        The size of this array's data in bytes when it is fully loaded
        into memory.

        .. note::

            When data is stored in a VLArray using the ObjectAtom type,
            it is first serialized using pickle, and then converted to
            a NumPy array suitable for storage in an HDF5 file.
            This attribute will return the size of that NumPy
            representation.  If you wish to know the size of the Python
            objects after they are loaded from disk, you can use this
            `ActiveState recipe
            <http://code.activestate.com/recipes/577504/>`_.
        """)

    # Other methods
    # ~~~~~~~~~~~~~
    def __init__(self,
                 parentnode,
                 name,
                 atom=None,
                 title="",
                 filters=None,
                 expectedrows=None,
                 chunkshape=None,
                 byteorder=None,
                 _log=True):

        self._v_version = None
        """The object version of this array."""

        self._v_new = new = atom is not None
        """Is this the first time the node has been created?"""

        self._v_new_title = title
        """New title for this node."""

        self._v_new_filters = filters
        """New filter properties for this array."""

        if expectedrows is None:
            expectedrows = parentnode._v_file.params['EXPECTED_ROWS_VLARRAY']
        self._v_expectedrows = expectedrows
        """The expected number of rows to be stored in the array.

        .. versionadded:: 3.0

        """

        self._v_chunkshape = None
        """Private storage for the `chunkshape` property of Leaf."""

        # Miscellaneous iteration rubbish.
        self._start = None
        """Starting row for the current iteration."""

        self._stop = None
        """Stopping row for the current iteration."""

        self._step = None
        """Step size for the current iteration."""

        self._nrowsread = None
        """Number of rows read up to the current state of iteration."""

        self._startb = None
        """Starting row for current buffer."""

        self._stopb = None
        """Stopping row for current buffer. """

        self._row = None
        """Current row in iterators (sentinel)."""

        self._init = False
        """Whether we are in the middle of an iteration or not (sentinel)."""

        self.listarr = None
        """Current buffer in iterators."""

        # Documented (*public*) attributes.
        self.atom = atom
        """
        An Atom (see :ref:`AtomClassDescr`) instance representing the
        *type* and *shape* of the atomic objects to be saved. You may
        use a *pseudo-atom* for storing a serialized object or
        variable length string per row.
        """
        self.nrow = None
        """On iterators, this is the index of the current row."""

        self.nrows = None
        """The current number of rows in the array."""

        self.extdim = 0  # VLArray only have one dimension currently
        """The index of the enlargeable dimension (always 0 for vlarrays)."""

        # Check the chunkshape parameter
        if new and chunkshape is not None:
            if isinstance(chunkshape, (int, numpy.integer, long)):
                chunkshape = (chunkshape, )
            try:
                chunkshape = tuple(chunkshape)
            except TypeError:
                raise TypeError(
                    "`chunkshape` parameter must be an integer or sequence "
                    "and you passed a %s" % type(chunkshape))
            if len(chunkshape) != 1:
                raise ValueError("`chunkshape` rank (length) must be 1: %r" %
                                 (chunkshape, ))
            self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)

        super(VLArray, self).__init__(parentnode, name, new, filters,
                                      byteorder, _log)

    def _g_post_init_hook(self):
        super(VLArray, self)._g_post_init_hook()
        self.nrowsinbuf = 100  # maybe enough for most applications

    # This is too specific for moving it into Leaf
    def _calc_chunkshape(self, expectedrows):
        """Calculate the size for the HDF5 chunk."""

        # For computing the chunkshape for HDF5 VL types, we have to
        # choose the itemsize of the *each* element of the atom and
        # not the size of the entire atom.  I don't know why this
        # should be like this, perhaps I should report this to the
        # HDF5 list.
        # F. Alted 2006-11-23
        # elemsize = self.atom.atomsize()
        elemsize = self._basesize

        # AV 2013-05-03
        # This is just a quick workaround tha allows to change the API for
        # PyTables 3.0 release and remove the expected_mb parameter.
        # The algorithm for computing the chunkshape should be rewritten as
        # requested by gh-35.
        expected_mb = expectedrows * elemsize / 1024.**2

        chunksize = calc_chunksize(expected_mb)

        # Set the chunkshape
        chunkshape = chunksize // elemsize
        # Safeguard against itemsizes being extremely large
        if chunkshape == 0:
            chunkshape = 1
        return (SizeType(chunkshape), )

    def _g_create(self):
        """Create a variable length array (ragged array)."""

        atom = self.atom
        self._v_version = obversion
        # Check for zero dims in atom shape (not allowed in VLArrays)
        zerodims = numpy.sum(numpy.array(atom.shape) == 0)
        if zerodims > 0:
            raise ValueError("When creating VLArrays, none of the dimensions "
                             "of the Atom instance can be zero.")

        if not hasattr(atom, 'size'):  # it is a pseudo-atom
            self._atomicdtype = atom.base.dtype
            self._atomicsize = atom.base.size
            self._basesize = atom.base.itemsize
        else:
            self._atomicdtype = atom.dtype
            self._atomicsize = atom.size
            self._basesize = atom.itemsize
        self._atomictype = atom.type
        self._atomicshape = atom.shape

        # Compute the optimal chunkshape, if needed
        if self._v_chunkshape is None:
            self._v_chunkshape = self._calc_chunkshape(self._v_expectedrows)

        self.nrows = SizeType(0)  # No rows at creation time

        # Correct the byteorder if needed
        if self.byteorder is None:
            self.byteorder = correct_byteorder(atom.type, sys.byteorder)

        # After creating the vlarray, ``self._v_objectid`` needs to be
        # set because it is needed for setting attributes afterwards.
        self._v_objectid = self._create_array(self._v_new_title)

        # Add an attribute in case we have a pseudo-atom so that we
        # can retrieve the proper class after a re-opening operation.
        if not hasattr(atom, 'size'):  # it is a pseudo-atom
            self.attrs.PSEUDOATOM = atom.kind

        return self._v_objectid

    def _g_open(self):
        """Get the metadata info for an array in file."""

        self._v_objectid, self.nrows, self._v_chunkshape, atom = \
            self._open_array()

        # Check if the atom can be a PseudoAtom
        if "PSEUDOATOM" in self.attrs:
            kind = self.attrs.PSEUDOATOM
            if kind == 'vlstring':
                atom = VLStringAtom()
            elif kind == 'vlunicode':
                atom = VLUnicodeAtom()
            elif kind == 'object':
                atom = ObjectAtom()
            else:
                raise ValueError("pseudo-atom name ``%s`` not known." % kind)
        elif self._v_file.format_version[:1] == "1":
            flavor1x = self.attrs.FLAVOR
            if flavor1x == "VLString":
                atom = VLStringAtom()
            elif flavor1x == "Object":
                atom = ObjectAtom()

        self.atom = atom
        return self._v_objectid

    def _getnobjects(self, nparr):
        """Return the number of objects in a NumPy array."""

        # Check for zero dimensionality array
        zerodims = numpy.sum(numpy.array(nparr.shape) == 0)
        if zerodims > 0:
            # No objects to be added
            return 0
        shape = nparr.shape
        atom_shape = self.atom.shape
        shapelen = len(nparr.shape)
        if isinstance(atom_shape, tuple):
            atomshapelen = len(self.atom.shape)
        else:
            atom_shape = (self.atom.shape, )
            atomshapelen = 1
        diflen = shapelen - atomshapelen
        if shape == atom_shape:
            nobjects = 1
        elif (diflen == 1 and shape[diflen:] == atom_shape):
            # Check if the leading dimensions are all ones
            # if shape[:diflen-1] == (1,)*(diflen-1):
            #    nobjects = shape[diflen-1]
            #    shape = shape[diflen:]
            # It's better to accept only inputs with the exact dimensionality
            # i.e. a dimensionality only 1 element larger than atom
            nobjects = shape[0]
            shape = shape[1:]
        elif atom_shape == (1, ) and shapelen == 1:
            # Case where shape = (N,) and shape_atom = 1 or (1,)
            nobjects = shape[0]
        else:
            raise ValueError("The object '%s' is composed of elements with "
                             "shape '%s', which is not compatible with the "
                             "atom shape ('%s')." % (nparr, shape, atom_shape))
        return nobjects

    def get_enum(self):
        """Get the enumerated type associated with this array.

        If this array is of an enumerated type, the corresponding Enum instance
        (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated
        type, a TypeError is raised.

        """

        if self.atom.kind != 'enum':
            raise TypeError("array ``%s`` is not of an enumerated type" %
                            self._v_pathname)

        return self.atom.enum

    getEnum = previous_api(get_enum)

    def append(self, sequence):
        """Add a sequence of data to the end of the dataset.

        This method appends the objects in the sequence to a *single row* in
        this array. The type and shape of individual objects must be compliant
        with the atoms in the array. In the case of serialized objects and
        variable length strings, the object or string to append is itself the
        sequence.

        """

        self._g_check_open()
        self._v_file._check_writable()

        # Prepare the sequence to convert it into a NumPy object
        atom = self.atom
        if not hasattr(atom, 'size'):  # it is a pseudo-atom
            sequence = atom.toarray(sequence)
            statom = atom.base
        else:
            try:  # fastest check in most cases
                len(sequence)
            except TypeError:
                raise TypeError("argument is not a sequence")
            statom = atom

        if len(sequence) > 0:
            # The sequence needs to be copied to make the operation safe
            # to in-place conversion.
            nparr = convert_to_np_atom2(sequence, statom)
            nobjects = self._getnobjects(nparr)
        else:
            nobjects = 0
            nparr = None

        self._append(nparr, nobjects)
        self.nrows += 1

    def iterrows(self, start=None, stop=None, step=None):
        """Iterate over the rows of the array.

        This method returns an iterator yielding an object of the current
        flavor for each selected row in the array.

        If a range is not supplied, *all the rows* in the array are iterated
        upon. You can also use the :meth:`VLArray.__iter__` special method for
        that purpose.  If you only want to iterate over a given *range of rows*
        in the array, you may use the start, stop and step parameters.

        Examples
        --------

        ::

            for row in vlarray.iterrows(step=4):
                print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, row))

        .. versionchanged:: 3.0
           If the *start* parameter is provided and *stop* is None then the
           array is iterated from *start* to the last line.
           In PyTables < 3.0 only one element was returned.

        """

        (self._start, self._stop,
         self._step) = self._process_range(start, stop, step)
        self._init_loop()
        return self

    def __iter__(self):
        """Iterate over the rows of the array.

        This is equivalent to calling :meth:`VLArray.iterrows` with default
        arguments, i.e. it iterates over *all the rows* in the array.

        Examples
        --------

        ::

            result = [row for row in vlarray]

        Which is equivalent to::

            result = [row for row in vlarray.iterrows()]

        """

        if not self._init:
            # If the iterator is called directly, assign default variables
            self._start = 0
            self._stop = self.nrows
            self._step = 1
            # and initialize the loop
            self._init_loop()

        return self

    def _init_loop(self):
        """Initialization for the __iter__ iterator."""

        self._nrowsread = self._start
        self._startb = self._start
        self._row = -1  # Sentinel
        self._init = True  # Sentinel
        self.nrow = SizeType(self._start - self._step)  # row number

    _initLoop = previous_api(_init_loop)

    def next(self):
        """Get the next element of the array during an iteration.

        The element is returned as a list of objects of the current
        flavor.

        """

        if self._nrowsread >= self._stop:
            self._init = False
            raise StopIteration  # end of iteration
        else:
            # Read a chunk of rows
            if self._row + 1 >= self.nrowsinbuf or self._row < 0:
                self._stopb = self._startb + self._step * self.nrowsinbuf
                self.listarr = self.read(self._startb, self._stopb, self._step)
                self._row = -1
                self._startb = self._stopb
            self._row += 1
            self.nrow += self._step
            self._nrowsread += self._step
            return self.listarr[self._row]

    def __getitem__(self, key):
        """Get a row or a range of rows from the array.

        If key argument is an integer, the corresponding array row is returned
        as an object of the current flavor.  If key is a slice, the range of
        rows determined by it is returned as a list of objects of the current
        flavor.

        In addition, NumPy-style point selections are supported.  In
        particular, if key is a list of row coordinates, the set of rows
        determined by it is returned.  Furthermore, if key is an array of
        boolean values, only the coordinates where key is True are returned.
        Note that for the latter to work it is necessary that key list would
        contain exactly as many rows as the array has.

        Examples
        --------

        ::

            a_row = vlarray[4]
            a_list = vlarray[4:1000:2]
            a_list2 = vlarray[[0,2]]   # get list of coords
            a_list3 = vlarray[[0,-2]]  # negative values accepted
            a_list4 = vlarray[numpy.array([True,...,False])]  # array of bools

        """

        self._g_check_open()
        if is_idx(key):
            key = operator.index(key)

            # Index out of range protection
            if key >= self.nrows:
                raise IndexError("Index out of range")
            if key < 0:
                # To support negative values
                key += self.nrows
            (start, stop, step) = self._process_range(key, key + 1, 1)
            return self.read(start, stop, step)[0]
        elif isinstance(key, slice):
            start, stop, step = self._process_range(key.start, key.stop,
                                                    key.step)
            return self.read(start, stop, step)
        # Try with a boolean or point selection
        elif type(key) in (list, tuple) or isinstance(key, numpy.ndarray):
            coords = self._point_selection(key)
            return self._read_coordinates(coords)
        else:
            raise IndexError("Invalid index or slice: %r" % (key, ))

    def _assign_values(self, coords, values):
        """Assign the `values` to the positions stated in `coords`."""

        for nrow, value in zip(coords, values):
            if nrow >= self.nrows:
                raise IndexError("First index out of range")
            if nrow < 0:
                # To support negative values
                nrow += self.nrows
            object_ = value
            # Prepare the object to convert it into a NumPy object
            atom = self.atom
            if not hasattr(atom, 'size'):  # it is a pseudo-atom
                object_ = atom.toarray(object_)
                statom = atom.base
            else:
                statom = atom
            value = convert_to_np_atom(object_, statom)
            nobjects = self._getnobjects(value)

            # Get the previous value
            nrow = idx2long(nrow)  # To convert any possible numpy scalar value
            nparr = self._read_array(nrow, nrow + 1, 1)[0]
            nobjects = len(nparr)
            if len(value) > nobjects:
                raise ValueError("Length of value (%s) is larger than number "
                                 "of elements in row (%s)" %
                                 (len(value), nobjects))
            try:
                nparr[:] = value
            except Exception as exc:  # XXX
                raise ValueError("Value parameter:\n'%r'\n"
                                 "cannot be converted into an array object "
                                 "compliant vlarray[%s] row: \n'%r'\n"
                                 "The error was: <%s>" %
                                 (value, nrow, nparr[:], exc))

            if nparr.size > 0:
                self._modify(nrow, nparr, nobjects)

    def __setitem__(self, key, value):
        """Set a row, or set of rows, in the array.

        It takes different actions depending on the type of the *key*
        parameter: if it is an integer, the corresponding table row is
        set to *value* (a record or sequence capable of being converted
        to the table structure).  If *key* is a slice, the row slice
        determined by it is set to *value* (a record array or sequence
        of rows capable of being converted to the table structure).

        In addition, NumPy-style point selections are supported.  In
        particular, if key is a list of row coordinates, the set of rows
        determined by it is set to value.  Furthermore, if key is an array of
        boolean values, only the coordinates where key is True are set to
        values from value.  Note that for the latter to work it is necessary
        that key list would contain exactly as many rows as the table has.

        .. note::

            When updating the rows of a VLArray object which uses a
            pseudo-atom, there is a problem: you can only update values
            with *exactly* the same size in bytes than the original row.
            This is very difficult to meet with object pseudo-atoms,
            because :mod:`pickle` applied on a Python object does not
            guarantee to return the same number of bytes than over another
            object, even if they are of the same class.
            This effectively limits the kinds of objects than can be
            updated in variable-length arrays.

        Examples
        --------

        ::

            vlarray[0] = vlarray[0] * 2 + 3
            vlarray[99] = arange(96) * 2 + 3

            # Negative values for the index are supported.
            vlarray[-99] = vlarray[5] * 2 + 3
            vlarray[1:30:2] = list_of_rows
            vlarray[[1,3]] = new_1_and_3_rows

        """

        self._g_check_open()
        self._v_file._check_writable()

        if is_idx(key):
            # If key is not a sequence, convert to it
            coords = [key]
            value = [value]
        elif isinstance(key, slice):
            (start, stop, step) = self._process_range(key.start, key.stop,
                                                      key.step)
            coords = range(start, stop, step)
        # Try with a boolean or point selection
        elif type(key) in (list, tuple) or isinstance(key, numpy.ndarray):
            coords = self._point_selection(key)
        else:
            raise IndexError("Invalid index or slice: %r" % (key, ))

        # Do the assignment row by row
        self._assign_values(coords, value)

    # Accessor for the _read_array method in superclass
    def read(self, start=None, stop=None, step=1):
        """Get data in the array as a list of objects of the current flavor.

        Please note that, as the lengths of the different rows are variable,
        the returned value is a *Python list* (not an array of the current
        flavor), with as many entries as specified rows in the range
        parameters.

        The start, stop and step parameters can be used to select only a
        *range of rows* in the array.  Their meanings are the same as in
        the built-in range() Python function, except that negative values
        of step are not allowed yet. Moreover, if only start is specified,
        then stop will be set to start + 1. If you do not specify neither
        start nor stop, then *all the rows* in the array are selected.

        """

        self._g_check_open()
        start, stop, step = self._process_range_read(start, stop, step)
        if start == stop:
            listarr = []
        else:
            listarr = self._read_array(start, stop, step)

        atom = self.atom
        if not hasattr(atom, 'size'):  # it is a pseudo-atom
            outlistarr = [atom.fromarray(arr) for arr in listarr]
        else:
            # Convert the list to the right flavor
            flavor = self.flavor
            outlistarr = [internal_to_flavor(arr, flavor) for arr in listarr]
        return outlistarr

    def _read_coordinates(self, coords):
        """Read rows specified in `coords`."""
        rows = []
        for coord in coords:
            rows.append(self.read(long(coord))[0])
        return rows

    def _g_copy_with_stats(self, group, name, start, stop, step, title,
                           filters, chunkshape, _log, **kwargs):
        """Private part of Leaf.copy() for each kind of leaf."""

        # Build the new VLArray object
        object = VLArray(group,
                         name,
                         self.atom,
                         title=title,
                         filters=filters,
                         expectedrows=self._v_expectedrows,
                         chunkshape=chunkshape,
                         _log=_log)

        # Now, fill the new vlarray with values from the old one
        # This is not buffered because we cannot forsee the length
        # of each record. So, the safest would be a copy row by row.
        # In the future, some analysis can be done in order to buffer
        # the copy process.
        nrowsinbuf = 1
        (start, stop, step) = self._process_range_read(start, stop, step)
        # Optimized version (no conversions, no type and shape checks, etc...)
        nrowscopied = SizeType(0)
        nbytes = 0
        if not hasattr(self.atom, 'size'):  # it is a pseudo-atom
            atomsize = self.atom.base.size
        else:
            atomsize = self.atom.size
        for start2 in xrange(start, stop, step * nrowsinbuf):
            # Save the records on disk
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            nparr = self._read_array(start=start2, stop=stop2, step=step)[0]
            nobjects = nparr.shape[0]
            object._append(nparr, nobjects)
            nbytes += nobjects * atomsize
            nrowscopied += 1
        object.nrows = nrowscopied
        return (object, nbytes)

    _g_copyWithStats = previous_api(_g_copy_with_stats)

    def __repr__(self):
        """This provides more metainfo in addition to standard __str__"""

        return """%s
  atom = %r
  byteorder = %r
  nrows = %s
  flavor = %r""" % (self, self.atom, self.byteorder, self.nrows, self.flavor)
Ejemplo n.º 9
0
class CArray(Array):
    """This class represents homogeneous datasets in an HDF5 file.

    The difference between a CArray and a normal Array (see
    :ref:`ArrayClassDescr`), from which it inherits, is that a CArray
    has a chunked layout and, as a consequence, it supports compression.
    You can use datasets of this class to easily save or load arrays to
    or from disk, with compression support included.

    CArray includes all the instance variables and methods of Array.
    Only those with different behavior are mentioned here.

    Parameters
    ----------
    parentnode
        The parent :class:`Group` object.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*.

    name : str
        The name of this node in its parent group.
    atom
       An `Atom` instance representing the *type* and *shape* of
       the atomic objects to be saved.

    shape
       The shape of the new array.

    title
       A description for this node (it sets the ``TITLE`` HDF5
       attribute on disk).

    filters
       An instance of the `Filters` class that provides
       information about the desired I/O filters to be applied
       during the life of this object.

    chunkshape
       The shape of the data chunk to be read or written in a
       single HDF5 I/O operation.  Filters are applied to those
       chunks of data.  The dimensionality of `chunkshape` must
       be the same as that of `shape`.  If ``None``, a sensible
       value is calculated (which is recommended).

    byteorder
        The byteorder of the data *on disk*, specified as 'little'
        or 'big'.  If this is not specified, the byteorder is that
        of the platform.

    Examples
    --------

    See below a small example of the use of the `CArray` class.
    The code is available in ``examples/carray1.py``::

        import numpy
        import tables

        fileName = 'carray1.h5'
        shape = (200, 300)
        atom = tables.UInt8Atom()
        filters = tables.Filters(complevel=5, complib='zlib')

        h5f = tables.open_file(fileName, 'w')
        ca = h5f.create_carray(h5f.root, 'carray', atom, shape,
                               filters=filters)

        # Fill a hyperslab in ``ca``.
        ca[10:60, 20:70] = numpy.ones((50, 50))
        h5f.close()

        # Re-open a read another hyperslab
        h5f = tables.open_file(fileName)
        print(h5f)
        print(h5f.root.carray[8:12, 18:22])
        h5f.close()

    The output for the previous script is something like::

        carray1.h5 (File) ''
        Last modif.: 'Thu Apr 12 10:15:38 2007'
        Object Tree:
        / (RootGroup) ''
        /carray (CArray(200, 300), shuffle, zlib(5)) ''

        [[0 0 0 0]
         [0 0 0 0]
         [0 0 1 1]
         [0 0 1 1]]

    """

    # Class identifier.
    _c_classid = 'CARRAY'

    _c_classId = previous_api_property('_c_classid')

    # Properties
    # ~~~~~~~~~~
    # Special methods
    # ~~~~~~~~~~~~~~~
    def __init__(self,
                 parentnode,
                 name,
                 atom=None,
                 shape=None,
                 title="",
                 filters=None,
                 chunkshape=None,
                 byteorder=None,
                 _log=True):

        self.atom = atom
        """An `Atom` instance representing the shape, type of the atomic
        objects to be saved.
        """
        self.shape = None
        """The shape of the stored array."""
        self.extdim = -1  # `CArray` objects are not enlargeable by default
        """The index of the enlargeable dimension."""

        # Other private attributes
        self._v_version = None
        """The object version of this array."""
        self._v_new = new = atom is not None
        """Is this the first time the node has been created?"""
        self._v_new_title = title
        """New title for this node."""
        self._v_convert = True
        """Whether the ``Array`` object must be converted or not."""
        self._v_chunkshape = chunkshape
        """Private storage for the `chunkshape` property of the leaf."""

        # Miscellaneous iteration rubbish.
        self._start = None
        """Starting row for the current iteration."""
        self._stop = None
        """Stopping row for the current iteration."""
        self._step = None
        """Step size for the current iteration."""
        self._nrowsread = None
        """Number of rows read up to the current state of iteration."""
        self._startb = None
        """Starting row for current buffer."""
        self._stopb = None
        """Stopping row for current buffer. """
        self._row = None
        """Current row in iterators (sentinel)."""
        self._init = False
        """Whether we are in the middle of an iteration or not (sentinel)."""
        self.listarr = None
        """Current buffer in iterators."""

        if new:
            if not isinstance(atom, Atom):
                raise ValueError("atom parameter should be an instance of "
                                 "tables.Atom and you passed a %s." %
                                 type(atom))
            if shape is None:
                raise ValueError("you must specify a non-empty shape")
            try:
                shape = tuple(shape)
            except TypeError:
                raise TypeError("`shape` parameter must be a sequence "
                                "and you passed a %s" % type(shape))
            self.shape = tuple(SizeType(s) for s in shape)

            if chunkshape is not None:
                try:
                    chunkshape = tuple(chunkshape)
                except TypeError:
                    raise TypeError(
                        "`chunkshape` parameter must be a sequence "
                        "and you passed a %s" % type(chunkshape))
                if len(shape) != len(chunkshape):
                    raise ValueError("the shape (%s) and chunkshape (%s) "
                                     "ranks must be equal." %
                                     (shape, chunkshape))
                elif min(chunkshape) < 1:
                    raise ValueError("chunkshape parameter cannot have "
                                     "zero-dimensions.")
                self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)

        # The `Array` class is not abstract enough! :(
        super(Array, self).__init__(parentnode, name, new, filters, byteorder,
                                    _log)

    def _g_create(self):
        """Create a new array in file (specific part)."""

        if min(self.shape) < 1:
            raise ValueError("shape parameter cannot have zero-dimensions.")
        # Finish the common part of creation process
        return self._g_create_common(self.nrows)

    def _g_create_common(self, expectedrows):
        """Create a new array in file (common part)."""

        self._v_version = obversion

        if self._v_chunkshape is None:
            # Compute the optimal chunk size
            self._v_chunkshape = self._calc_chunkshape(expectedrows,
                                                       self.rowsize,
                                                       self.atom.size)
        # Compute the optimal nrowsinbuf
        self.nrowsinbuf = self._calc_nrowsinbuf()
        # Correct the byteorder if needed
        if self.byteorder is None:
            self.byteorder = correct_byteorder(self.atom.type, sys.byteorder)

        try:
            # ``self._v_objectid`` needs to be set because would be
            # needed for setting attributes in some descendants later
            # on
            self._v_objectid = self._create_carray(self._v_new_title)
        except:  # XXX
            # Problems creating the Array on disk. Close node and re-raise.
            self.close(flush=0)
            raise

        return self._v_objectid

    def _g_copy_with_stats(self, group, name, start, stop, step, title,
                           filters, chunkshape, _log, **kwargs):
        """Private part of Leaf.copy() for each kind of leaf."""

        (start, stop, step) = self._process_range_read(start, stop, step)
        maindim = self.maindim
        shape = list(self.shape)
        shape[maindim] = len(xrange(0, stop - start, step))
        # Now, fill the new carray with values from source
        nrowsinbuf = self.nrowsinbuf
        # The slices parameter for self.__getitem__
        slices = [slice(0, dim, 1) for dim in self.shape]
        # This is a hack to prevent doing unnecessary conversions
        # when copying buffers
        self._v_convert = False
        # Build the new CArray object
        object = CArray(group,
                        name,
                        atom=self.atom,
                        shape=shape,
                        title=title,
                        filters=filters,
                        chunkshape=chunkshape,
                        _log=_log)
        # Start the copy itself
        for start2 in xrange(start, stop, step * nrowsinbuf):
            # Save the records on disk
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the main dimension
            slices[maindim] = slice(start2, stop2, step)
            start3 = (start2 - start) // step
            stop3 = start3 + nrowsinbuf
            if stop3 > shape[maindim]:
                stop3 = shape[maindim]
            # The next line should be generalised if, in the future,
            # maindim is designed to be different from 0 in CArrays.
            # See ticket #199.
            object[start3:stop3] = self.__getitem__(tuple(slices))
        # Activate the conversion again (default)
        self._v_convert = True
        nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size

        return (object, nbytes)

    _g_copyWithStats = previous_api(_g_copy_with_stats)
Ejemplo n.º 10
0
class Array(hdf5extension.Array, Leaf):
    """This class represents homogeneous datasets in an HDF5 file.

    This class provides methods to write or read data to or from array objects
    in the file. This class does not allow you neither to enlarge nor compress
    the datasets on disk; use the EArray class (see :ref:`EArrayClassDescr`) if
    you want enlargeable dataset support or compression features, or CArray
    (see :ref:`CArrayClassDescr`) if you just want compression.

    An interesting property of the Array class is that it remembers the
    *flavor* of the object that has been saved so that if you saved, for
    example, a list, you will get a list during readings afterwards; if you
    saved a NumPy array, you will get a NumPy object, and so forth.

    Note that this class inherits all the public attributes and methods that
    Leaf (see :ref:`LeafClassDescr`) already provides. However, as Array
    instances have no internal I/O buffers, it is not necessary to use the
    flush() method they inherit from Leaf in order to save their internal state
    to disk.  When a writing method call returns, all the data is already on
    disk.

    Parameters
    ----------
    parentnode
        The parent :class:`Group` object.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*

    name : str
        The name of this node in its parent group.
    obj
        The array or scalar to be saved.  Accepted types are NumPy
        arrays and scalars as well as native Python sequences and
        scalars, provided that values are regular (i.e. they are not
        like ``[[1,2],2]``) and homogeneous (i.e. all the elements are
        of the same type).

        .. versionchanged:: 3.0
           Renamed form *object* into *obj*.
    title
        A description for this node (it sets the ``TITLE`` HDF5 attribute on
        disk).
    byteorder
        The byteorder of the data *on disk*, specified as 'little' or 'big'.
        If this is not specified, the byteorder is that of the given `object`.

    """

    # Class identifier.
    _c_classid = 'ARRAY'

    _c_classId = previous_api_property('_c_classid')
    _v_objectId = previous_api_property('_v_objectid')

    # Lazy read-only attributes
    # `````````````````````````
    @lazyattr
    def dtype(self):
        """The NumPy ``dtype`` that most closely matches this array."""

        return self.atom.dtype

    # Properties
    # ~~~~~~~~~~
    def _getnrows(self):
        if self.shape == ():
            return SizeType(1)  # scalar case
        else:
            return self.shape[self.maindim]

    nrows = property(_getnrows, None, None, "The number of rows in the array.")

    def _getrowsize(self):
        maindim = self.maindim
        rowsize = self.atom.size
        for i, dim in enumerate(self.shape):
            if i != maindim:
                rowsize *= dim
        return rowsize

    rowsize = property(
        _getrowsize, None, None,
        "The size of the rows in bytes in dimensions orthogonal to *maindim*.")

    size_in_memory = property(
        lambda self: self.nrows * self.rowsize, None, None,
        """The size of this array's data in bytes when it is fully loaded into
        memory.""")

    # Other methods
    # ~~~~~~~~~~~~~
    def __init__(self,
                 parentnode,
                 name,
                 obj=None,
                 title="",
                 byteorder=None,
                 _log=True,
                 _atom=None):

        self._v_version = None
        """The object version of this array."""
        self._v_new = new = obj is not None
        """Is this the first time the node has been created?"""
        self._v_new_title = title
        """New title for this node."""
        self._obj = obj
        """The object to be stored in the array.  It can be any of numpy,
        list, tuple, string, integer of floating point types, provided
        that they are regular (i.e. they are not like ``[[1, 2], 2]``).

        .. versionchanged:: 3.0
           Renamed form *_object* into *_obj*.

        """

        self._v_convert = True
        """Whether the ``Array`` object must be converted or not."""

        # Miscellaneous iteration rubbish.
        self._start = None
        """Starting row for the current iteration."""
        self._stop = None
        """Stopping row for the current iteration."""
        self._step = None
        """Step size for the current iteration."""
        self._nrowsread = None
        """Number of rows read up to the current state of iteration."""
        self._startb = None
        """Starting row for current buffer."""
        self._stopb = None
        """Stopping row for current buffer. """
        self._row = None
        """Current row in iterators (sentinel)."""
        self._init = False
        """Whether we are in the middle of an iteration or not (sentinel)."""
        self.listarr = None
        """Current buffer in iterators."""

        # Documented (*public*) attributes.
        self.atom = _atom
        """An Atom (see :ref:`AtomClassDescr`) instance representing the *type*
        and *shape* of the atomic objects to be saved.
        """
        self.shape = None
        """The shape of the stored array."""
        self.nrow = None
        """On iterators, this is the index of the current row."""
        self.extdim = -1  # ordinary arrays are not enlargeable
        """The index of the enlargeable dimension."""

        # Ordinary arrays have no filters: leaf is created with default ones.
        super(Array, self).__init__(parentnode, name, new, Filters(),
                                    byteorder, _log)

    def _g_create(self):
        """Save a new array in file."""

        self._v_version = obversion
        try:
            # `Leaf._g_post_init_hook()` should be setting the flavor on disk.
            self._flavor = flavor = flavor_of(self._obj)
            nparr = array_as_internal(self._obj, flavor)
        except:  # XXX
            # Problems converting data. Close the node and re-raise exception.
            self.close(flush=0)
            raise

        # Raise an error in case of unsupported object
        if nparr.dtype.kind in ['V', 'U', 'O']:  # in void, unicode, object
            raise TypeError("Array objects cannot currently deal with void, "
                            "unicode or object arrays")

        # Decrease the number of references to the object
        self._obj = None

        # Fix the byteorder of data
        nparr = self._g_fix_byteorder_data(nparr, nparr.dtype.byteorder)

        # Create the array on-disk
        try:
            # ``self._v_objectid`` needs to be set because would be
            # needed for setting attributes in some descendants later
            # on
            (self._v_objectid, self.shape,
             self.atom) = self._create_array(nparr, self._v_new_title,
                                             self.atom)
        except:  # XXX
            # Problems creating the Array on disk. Close node and re-raise.
            self.close(flush=0)
            raise

        # Compute the optimal buffer size
        self.nrowsinbuf = self._calc_nrowsinbuf()
        # Arrays don't have chunkshapes (so, set it to None)
        self._v_chunkshape = None

        return self._v_objectid

    def _g_open(self):
        """Get the metadata info for an array in file."""

        (oid, self.atom, self.shape, self._v_chunkshape) = self._open_array()

        self.nrowsinbuf = self._calc_nrowsinbuf()

        return oid

    def get_enum(self):
        """Get the enumerated type associated with this array.

        If this array is of an enumerated type, the corresponding Enum instance
        (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated
        type, a TypeError is raised.

        """

        if self.atom.kind != 'enum':
            raise TypeError("array ``%s`` is not of an enumerated type" %
                            self._v_pathname)

        return self.atom.enum

    getEnum = previous_api(get_enum)

    def iterrows(self, start=None, stop=None, step=None):
        """Iterate over the rows of the array.

        This method returns an iterator yielding an object of the current
        flavor for each selected row in the array.  The returned rows are taken
        from the *main dimension*.

        If a range is not supplied, *all the rows* in the array are iterated
        upon - you can also use the :meth:`Array.__iter__` special method for
        that purpose.  If you only want to iterate over a given *range of rows*
        in the array, you may use the start, stop and step parameters.

        Examples
        --------

        ::

            result = [row for row in arrayInstance.iterrows(step=4)]

        .. versionchanged:: 3.0
           If the *start* parameter is provided and *stop* is None then the
           array is iterated from *start* to the last line.
           In PyTables < 3.0 only one element was returned.

        """

        try:
            (self._start, self._stop,
             self._step) = self._process_range(start, stop, step)
        except IndexError:
            # If problems with indexes, silently return the null tuple
            return ()
        self._init_loop()
        return self

    def __iter__(self):
        """Iterate over the rows of the array.

        This is equivalent to calling :meth:`Array.iterrows` with default
        arguments, i.e. it iterates over *all the rows* in the array.

        Examples
        --------

        ::

            result = [row[2] for row in array]

        Which is equivalent to::

            result = [row[2] for row in array.iterrows()]

        """

        if not self._init:
            # If the iterator is called directly, assign default variables
            self._start = 0
            self._stop = self.nrows
            self._step = 1
            # and initialize the loop
            self._init_loop()
        return self

    def _init_loop(self):
        """Initialization for the __iter__ iterator."""

        self._nrowsread = self._start
        self._startb = self._start
        self._row = -1  # Sentinel
        self._init = True  # Sentinel
        self.nrow = SizeType(self._start - self._step)  # row number

    _initLoop = previous_api(_init_loop)

    def next(self):
        """Get the next element of the array during an iteration.

        The element is returned as an object of the current flavor.

        """

        # this could probably be sped up for long iterations by reusing the
        # listarr buffer
        if self._nrowsread >= self._stop:
            self._init = False
            self.listarr = None  # fixes issue #308
            raise StopIteration  # end of iteration
        else:
            # Read a chunk of rows
            if self._row + 1 >= self.nrowsinbuf or self._row < 0:
                self._stopb = self._startb + self._step * self.nrowsinbuf
                # Protection for reading more elements than needed
                if self._stopb > self._stop:
                    self._stopb = self._stop
                listarr = self._read(self._startb, self._stopb, self._step)
                # Swap the axes to easy the return of elements
                if self.extdim > 0:
                    listarr = listarr.swapaxes(self.extdim, 0)
                self.listarr = internal_to_flavor(listarr, self.flavor)
                self._row = -1
                self._startb = self._stopb
            self._row += 1
            self.nrow += self._step
            self._nrowsread += self._step
            # Fixes bug #968132
            # if self.listarr.shape:
            if self.shape:
                return self.listarr[self._row]
            else:
                return self.listarr  # Scalar case

    def _interpret_indexing(self, keys):
        """Internal routine used by __getitem__ and __setitem__"""

        maxlen = len(self.shape)
        shape = (maxlen, )
        startl = numpy.empty(shape=shape, dtype=SizeType)
        stopl = numpy.empty(shape=shape, dtype=SizeType)
        stepl = numpy.empty(shape=shape, dtype=SizeType)
        stop_None = numpy.zeros(shape=shape, dtype=SizeType)
        if not isinstance(keys, tuple):
            keys = (keys, )
        nkeys = len(keys)
        dim = 0
        # Here is some problem when dealing with [...,...] params
        # but this is a bit weird way to pass parameters anyway
        for key in keys:
            ellipsis = 0  # Sentinel
            if isinstance(key, type(Ellipsis)):
                ellipsis = 1
                for diml in xrange(dim, len(self.shape) - (nkeys - dim) + 1):
                    startl[dim] = 0
                    stopl[dim] = self.shape[diml]
                    stepl[dim] = 1
                    dim += 1
            elif dim >= maxlen:
                raise IndexError("Too many indices for object '%s'" %
                                 self._v_pathname)
            elif is_idx(key):
                # Protection for index out of range
                if key >= self.shape[dim]:
                    raise IndexError("Index out of range")
                if key < 0:
                    # To support negative values (Fixes bug #968149)
                    key += self.shape[dim]
                start, stop, step = self._process_range(key,
                                                        key + 1,
                                                        1,
                                                        dim=dim)
                stop_None[dim] = 1
            elif isinstance(key, slice):
                start, stop, step = self._process_range(key.start,
                                                        key.stop,
                                                        key.step,
                                                        dim=dim)
            else:
                raise TypeError("Non-valid index or slice: %s" % key)
            if not ellipsis:
                startl[dim] = start
                stopl[dim] = stop
                stepl[dim] = step
                dim += 1

        # Complete the other dimensions, if needed
        if dim < len(self.shape):
            for diml in xrange(dim, len(self.shape)):
                startl[dim] = 0
                stopl[dim] = self.shape[diml]
                stepl[dim] = 1
                dim += 1

        # Compute the shape for the container properly. Fixes #1288792
        shape = []
        for dim in xrange(len(self.shape)):
            # The negative division operates differently with python scalars
            # and numpy scalars (which are similar to C conventions). See:
            # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3
            # and
            # http://www.peterbe.com/Integer-division-in-programming-languages
            # for more info on this issue.
            # I've finally decided to rely on the len(xrange) function.
            # F. Alted 2006-09-25
            # Switch to `lrange` to allow long ranges (see #99).
            # use xrange, since it supports large integers as of Python 2.6
            # see github #181
            new_dim = len(xrange(startl[dim], stopl[dim], stepl[dim]))
            if not (new_dim == 1 and stop_None[dim]):
                shape.append(new_dim)

        return startl, stopl, stepl, shape

    def _fancy_selection(self, args):
        """Performs a NumPy-style fancy selection in `self`.

        Implements advanced NumPy-style selection operations in
        addition to the standard slice-and-int behavior.

        Indexing arguments may be ints, slices or lists of indices.

        Note: This is a backport from the h5py project.

        """

        # Internal functions

        def validate_number(num, length):
            """Validate a list member for the given axis length."""

            try:
                num = long(num)
            except TypeError:
                raise TypeError("Illegal index: %r" % num)
            if num > length - 1:
                raise IndexError("Index out of bounds: %d" % num)

        def expand_ellipsis(args, rank):
            """Expand ellipsis objects and fill in missing axes."""

            n_el = sum(1 for arg in args if arg is Ellipsis)
            if n_el > 1:
                raise IndexError("Only one ellipsis may be used.")
            elif n_el == 0 and len(args) != rank:
                args = args + (Ellipsis, )

            final_args = []
            n_args = len(args)
            for idx, arg in enumerate(args):
                if arg is Ellipsis:
                    final_args.extend((slice(None), ) * (rank - n_args + 1))
                else:
                    final_args.append(arg)

            if len(final_args) > rank:
                raise IndexError("Too many indices.")

            return final_args

        def translate_slice(exp, length):
            """Given a slice object, return a 3-tuple (start, count, step)

            This is for for use with the hyperslab selection routines.

            """

            start, stop, step = exp.start, exp.stop, exp.step
            if start is None:
                start = 0
            else:
                start = long(start)
            if stop is None:
                stop = length
            else:
                stop = long(stop)
            if step is None:
                step = 1
            else:
                step = long(step)

            if step < 1:
                raise IndexError("Step must be >= 1 (got %d)" % step)
            if stop == start:
                raise IndexError("Zero-length selections are not allowed")
            if stop < start:
                raise IndexError("Reverse-order selections are not allowed")
            if start < 0:
                start = length + start
            if stop < 0:
                stop = length + stop

            if not 0 <= start <= (length - 1):
                raise IndexError("Start index %s out of range (0-%d)" %
                                 (start, length - 1))
            if not 1 <= stop <= length:
                raise IndexError("Stop index %s out of range (1-%d)" %
                                 (stop, length))

            count = (stop - start) // step
            if (stop - start) % step != 0:
                count += 1

            if start + count > length:
                raise IndexError("Selection out of bounds (%d; axis has %d)" %
                                 (start + count, length))

            return start, count, step

        # Main code for _fancy_selection
        mshape = []
        selection = []

        if not isinstance(args, tuple):
            args = (args, )

        args = expand_ellipsis(args, len(self.shape))

        list_seen = False
        reorder = None
        for idx, (exp, length) in enumerate(zip(args, self.shape)):
            if isinstance(exp, slice):
                start, count, step = translate_slice(exp, length)
                selection.append((start, count, step, idx, "AND"))
                mshape.append(count)
            else:
                try:
                    exp = list(exp)
                except TypeError:
                    exp = [exp]  # Handle scalar index as a list of length 1
                    mshape.append(0)  # Keep track of scalar index for NumPy
                else:
                    mshape.append(len(exp))
                if len(exp) == 0:
                    raise IndexError(
                        "Empty selections are not allowed (axis %d)" % idx)
                elif len(exp) > 1:
                    if list_seen:
                        raise IndexError("Only one selection list is allowed")
                    else:
                        list_seen = True
                else:
                    if (not isinstance(exp[0], (int, long, numpy.integer)) or
                        (isinstance(exp[0], numpy.ndarray) and
                         not numpy.issubdtype(exp[0].dtype, numpy.integer))):
                        raise TypeError("Only integer coordinates allowed.")

                nexp = numpy.asarray(exp, dtype="i8")
                # Convert negative values
                nexp = numpy.where(nexp < 0, length + nexp, nexp)
                # Check whether the list is ordered or not
                # (only one unordered list is allowed)
                if not len(nexp) == len(numpy.unique(nexp)):
                    raise IndexError(
                        "Selection lists cannot have repeated values")
                neworder = nexp.argsort()
                if (neworder.shape != (len(exp), ) or numpy.sum(
                        numpy.abs(neworder - numpy.arange(len(exp)))) != 0):
                    if reorder is not None:
                        raise IndexError(
                            "Only one selection list can be unordered")
                    corrected_idx = sum(1 for x in mshape if x != 0) - 1
                    reorder = (corrected_idx, neworder)
                    nexp = nexp[neworder]
                for select_idx in xrange(len(nexp) + 1):
                    # This crazy piece of code performs a list selection
                    # using HDF5 hyperslabs.
                    # For each index, perform a "NOTB" selection on every
                    # portion of *this axis* which falls *outside* the list
                    # selection.  For this to work, the input array MUST be
                    # monotonically increasing.
                    if select_idx < len(nexp):
                        validate_number(nexp[select_idx], length)
                    if select_idx == 0:
                        start = 0
                        count = nexp[0]
                    elif select_idx == len(nexp):
                        start = nexp[-1] + 1
                        count = length - start
                    else:
                        start = nexp[select_idx - 1] + 1
                        count = nexp[select_idx] - start
                    if count > 0:
                        selection.append((start, count, 1, idx, "NOTB"))

        mshape = tuple(x for x in mshape if x != 0)
        return selection, reorder, mshape

    _fancySelection = previous_api(_fancy_selection)

    def __getitem__(self, key):
        """Get a row, a range of rows or a slice from the array.

        The set of tokens allowed for the key is the same as that for extended
        slicing in Python (including the Ellipsis or ... token).  The result is
        an object of the current flavor; its shape depends on the kind of slice
        used as key and the shape of the array itself.

        Furthermore, NumPy-style fancy indexing, where a list of indices in a
        certain axis is specified, is also supported.  Note that only one list
        per selection is supported right now.  Finally, NumPy-style point and
        boolean selections are supported as well.

        Examples
        --------

        ::

            array1 = array[4]                       # simple selection
            array2 = array[4:1000:2]                # slice selection
            array3 = array[1, ..., ::2, 1:4, 4:]    # general slice selection
            array4 = array[1, [1,5,10], ..., -1]    # fancy selection
            array5 = array[np.where(array[:] > 4)]  # point selection
            array6 = array[array[:] > 4]            # boolean selection

        """

        self._g_check_open()

        try:
            # First, try with a regular selection
            startl, stopl, stepl, shape = self._interpret_indexing(key)
            arr = self._read_slice(startl, stopl, stepl, shape)
        except TypeError:
            # Then, try with a point-wise selection
            try:
                coords = self._point_selection(key)
                arr = self._read_coords(coords)
            except TypeError:
                # Finally, try with a fancy selection
                selection, reorder, shape = self._fancy_selection(key)
                arr = self._read_selection(selection, reorder, shape)

        if self.flavor == "numpy" or not self._v_convert:
            return arr

        return internal_to_flavor(arr, self.flavor)

    def __setitem__(self, key, value):
        """Set a row, a range of rows or a slice in the array.

        It takes different actions depending on the type of the key parameter:
        if it is an integer, the corresponding array row is set to value (the
        value is broadcast when needed).  If key is a slice, the row slice
        determined by it is set to value (as usual, if the slice to be updated
        exceeds the actual shape of the array, only the values in the existing
        range are updated).

        If value is a multidimensional object, then its shape must be
        compatible with the shape determined by key, otherwise, a ValueError
        will be raised.

        Furthermore, NumPy-style fancy indexing, where a list of indices in a
        certain axis is specified, is also supported.  Note that only one list
        per selection is supported right now.  Finally, NumPy-style point and
        boolean selections are supported as well.

        Examples
        --------

        ::

            a1[0] = 333        # assign an integer to a Integer Array row
            a2[0] = 'b'        # assign a string to a string Array row
            a3[1:4] = 5        # broadcast 5 to slice 1:4
            a4[1:4:2] = 'xXx'  # broadcast 'xXx' to slice 1:4:2

            # General slice update (a5.shape = (4,3,2,8,5,10).
            a5[1, ..., ::2, 1:4, 4:] = numpy.arange(1728, shape=(4,3,2,4,3,6))
            a6[1, [1,5,10], ..., -1] = arr    # fancy selection
            a7[np.where(a6[:] > 4)] = 4       # point selection + broadcast
            a8[arr > 4] = arr2                # boolean selection

        """

        self._g_check_open()

        # Create an array compliant with the specified slice
        nparr = convert_to_np_atom2(value, self.atom)
        if nparr.size == 0:
            return

        # truncate data if least_significant_digit filter is set
        # TODO: add the least_significant_digit attribute to the array on disk
        if (self.filters.least_significant_digit is not None
                and not numpy.issubdtype(nparr.dtype, int)):
            nparr = quantize(nparr, self.filters.least_significant_digit)

        try:
            startl, stopl, stepl, shape = self._interpret_indexing(key)
            self._write_slice(startl, stopl, stepl, shape, nparr)
        except TypeError:
            # Then, try with a point-wise selection
            try:
                coords = self._point_selection(key)
                self._write_coords(coords, nparr)
            except TypeError:
                selection, reorder, shape = self._fancy_selection(key)
                self._write_selection(selection, reorder, shape, nparr)

    def _check_shape(self, nparr, slice_shape):
        """Test that nparr shape is consistent with underlying object.

        If not, try creating a new nparr object, using broadcasting if
        necessary.

        """

        if nparr.shape != (slice_shape + self.atom.dtype.shape):
            # Create an array compliant with the specified shape
            narr = numpy.empty(shape=slice_shape, dtype=self.atom.dtype)

            # Assign the value to it. It will raise a ValueError exception
            # if the objects cannot be broadcast to a single shape.
            narr[...] = nparr
            return narr
        else:
            return nparr

    _checkShape = previous_api(_check_shape)

    def _read_slice(self, startl, stopl, stepl, shape):
        """Read a slice based on `startl`, `stopl` and `stepl`."""

        nparr = numpy.empty(dtype=self.atom.dtype, shape=shape)
        # Protection against reading empty arrays
        if 0 not in shape:
            # Arrays that have non-zero dimensionality
            self._g_read_slice(startl, stopl, stepl, nparr)
        # For zero-shaped arrays, return the scalar
        if nparr.shape == ():
            nparr = nparr[()]
        return nparr

    _readSlice = previous_api(_read_slice)

    def _read_coords(self, coords):
        """Read a set of points defined by `coords`."""

        nparr = numpy.empty(dtype=self.atom.dtype, shape=len(coords))
        if len(coords) > 0:
            self._g_read_coords(coords, nparr)
        # For zero-shaped arrays, return the scalar
        if nparr.shape == ():
            nparr = nparr[()]
        return nparr

    _readCoords = previous_api(_read_coords)

    def _read_selection(self, selection, reorder, shape):
        """Read a `selection`.

        Reorder if necessary.

        """

        # Create the container for the slice
        nparr = numpy.empty(dtype=self.atom.dtype, shape=shape)
        # Arrays that have non-zero dimensionality
        self._g_read_selection(selection, nparr)
        # For zero-shaped arrays, return the scalar
        if nparr.shape == ():
            nparr = nparr[()]
        elif reorder is not None:
            # We need to reorder the array
            idx, neworder = reorder
            k = [slice(None)] * len(shape)
            k[idx] = neworder.argsort()
            # Apparently, a copy is not needed here, but doing it
            # for symmetry with the `_write_selection()` method.
            nparr = nparr[k].copy()
        return nparr

    _readSelection = previous_api(_read_selection)

    def _write_slice(self, startl, stopl, stepl, shape, nparr):
        """Write `nparr` in a slice based on `startl`, `stopl` and `stepl`."""

        nparr = self._check_shape(nparr, tuple(shape))
        countl = ((stopl - startl - 1) // stepl) + 1
        self._g_write_slice(startl, stepl, countl, nparr)

    _writeSlice = previous_api(_write_slice)

    def _write_coords(self, coords, nparr):
        """Write `nparr` values in points defined by `coords` coordinates."""

        if len(coords) > 0:
            nparr = self._check_shape(nparr, (len(coords), ))
            self._g_write_coords(coords, nparr)

    _writeCoords = previous_api(_write_coords)

    def _write_selection(self, selection, reorder, shape, nparr):
        """Write `nparr` in `selection`.

        Reorder if necessary.

        """

        nparr = self._check_shape(nparr, tuple(shape))
        # Check whether we should reorder the array
        if reorder is not None:
            idx, neworder = reorder
            k = [slice(None)] * len(shape)
            k[idx] = neworder
            # For a reason a don't understand well, we need a copy of
            # the reordered array
            nparr = nparr[k].copy()
        self._g_write_selection(selection, nparr)

    _writeSelection = previous_api(_write_selection)

    def _read(self, start, stop, step, out=None):
        """Read the array from disk without slice or flavor processing."""

        nrowstoread = len(xrange(0, stop - start, step))
        shape = list(self.shape)
        if shape:
            shape[self.maindim] = nrowstoread
        if out is None:
            arr = numpy.empty(dtype=self.atom.dtype, shape=shape)
        else:
            bytes_required = self.rowsize * nrowstoread
            # if buffer is too small, it will segfault
            if bytes_required != out.nbytes:
                raise ValueError(
                    ('output array size invalid, got {0} bytes, '
                     'need {1} bytes').format(out.nbytes, bytes_required))
            if not out.flags['C_CONTIGUOUS']:
                raise ValueError('output array not C contiguous')
            arr = out
        # Protection against reading empty arrays
        if 0 not in shape:
            # Arrays that have non-zero dimensionality
            self._read_array(start, stop, step, arr)
        # data is always read in the system byteorder
        # if the out array's byteorder is different, do a byteswap
        if (out is not None
                and byteorders[arr.dtype.byteorder] != sys.byteorder):
            arr.byteswap(True)
        return arr

    def read(self, start=None, stop=None, step=None, out=None):
        """Get data in the array as an object of the current flavor.

        The start, stop and step parameters can be used to select only a
        *range of rows* in the array.  Their meanings are the same as in
        the built-in range() Python function, except that negative values
        of step are not allowed yet. Moreover, if only start is specified,
        then stop will be set to start + 1. If you do not specify neither
        start nor stop, then *all the rows* in the array are selected.

        The out parameter may be used to specify a NumPy array to receive
        the output data.  Note that the array must have the same size as
        the data selected with the other parameters.  Note that the array's
        datatype is not checked and no type casting is performed, so if it
        does not match the datatype on disk, the output will not be correct.
        Also, this parameter is only valid when the array's flavor is set
        to 'numpy'.  Otherwise, a TypeError will be raised.

        When data is read from disk in NumPy format, the output will be
        in the current system's byteorder, regardless of how it is stored
        on disk.
        The exception is when an output buffer is supplied, in which case
        the output will be in the byteorder of that output buffer.

        .. versionchanged:: 3.0
           Added the *out* parameter.

        """

        self._g_check_open()
        if out is not None and self.flavor != 'numpy':
            msg = ("Optional 'out' argument may only be supplied if array "
                   "flavor is 'numpy', currently is {0}").format(self.flavor)
            raise TypeError(msg)
        (start, stop, step) = self._process_range_read(start, stop, step)
        arr = self._read(start, stop, step, out)
        return internal_to_flavor(arr, self.flavor)

    def _g_copy_with_stats(self, group, name, start, stop, step, title,
                           filters, chunkshape, _log, **kwargs):
        """Private part of Leaf.copy() for each kind of leaf."""

        # Compute the correct indices.
        (start, stop, step) = self._process_range_read(start, stop, step)
        # Get the slice of the array
        # (non-buffered version)
        if self.shape:
            arr = self[start:stop:step]
        else:
            arr = self[()]
        # Build the new Array object.  Use the _atom reserved keyword
        # just in case the array is being copied from a native HDF5
        # with atomic types different from scalars.
        # For details, see #275 of trac.
        object_ = Array(group,
                        name,
                        arr,
                        title=title,
                        _log=_log,
                        _atom=self.atom)
        nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size

        return (object_, nbytes)

    _g_copyWithStats = previous_api(_g_copy_with_stats)

    def __repr__(self):
        """This provides more metainfo in addition to standard __str__"""

        return """%s
  atom := %r
  maindim := %r
  flavor := %r
  byteorder := %r
  chunkshape := %r""" % (self, self.atom, self.maindim, self.flavor,
                         self.byteorder, self.chunkshape)
Ejemplo n.º 11
0
class Node(object):
    """Abstract base class for all PyTables nodes.

    This is the base class for *all* nodes in a PyTables hierarchy. It is an
    abstract class, i.e. it may not be directly instantiated; however, every
    node in the hierarchy is an instance of this class.

    A PyTables node is always hosted in a PyTables *file*, under a *parent
    group*, at a certain *depth* in the node hierarchy. A node knows its own
    *name* in the parent group and its own *path name* in the file.

    All the previous information is location-dependent, i.e. it may change when
    moving or renaming a node in the hierarchy. A node also has
    location-independent information, such as its *HDF5 object identifier* and
    its *attribute set*.

    This class gathers the operations and attributes (both location-dependent
    and independent) which are common to all PyTables nodes, whatever their
    type is. Nonetheless, due to natural naming restrictions, the names of all
    of these members start with a reserved prefix (see the Group class
    in :ref:`GroupClassDescr`).

    Sub-classes with no children (e.g. *leaf nodes*) may define new methods,
    attributes and properties to avoid natural naming restrictions. For
    instance, _v_attrs may be shortened to attrs and _f_rename to
    rename. However, the original methods and attributes should still be
    available.

    .. rubric:: Node attributes

    .. attribute:: _v_depth

        The depth of this node in the tree (an non-negative integer value).

    .. attribute:: _v_file

        The hosting File instance (see :ref:`FileClassDescr`).

    .. attribute:: _v_name

        The name of this node in its parent group (a string).

    .. attribute:: _v_pathname

        The path of this node in the tree (a string).

    .. attribute:: _v_objectid

        A node identifier (may change from run to run).

        .. versionchanged:: 3.0
           The *_v_objectID* attribute has been renamed into *_v_object_id*.

    """

    # This makes this class and all derived subclasses be handled by MetaNode.
    __metaclass__ = MetaNode

    # By default, attributes accept Undo/Redo.
    _AttributeSet = AttributeSet

    # `_v_parent` is accessed via its file to avoid upwards references.
    def _g_getparent(self):
        (parentpath, nodename) = split_path(self._v_pathname)
        return self._v_file._get_node(parentpath)

    _v_parent = property(_g_getparent, None, None,
                         ("The parent :class:`Group` instance"))

    # '_v_attrs' is defined as a lazy read-only attribute.
    # This saves 0.7s/3.8s.
    @lazyattr
    def _v_attrs(self):
        """The associated `AttributeSet` instance.

        See Also
        --------
        tables.attributeset.AttributeSet : container for the HDF5 attributes

        """

        return self._AttributeSet(self)

    # '_v_title' is a direct read-write shorthand for the 'TITLE' attribute
    # with the empty string as a default value.
    def _g_gettitle(self):
        if hasattr(self._v_attrs, 'TITLE'):
            return self._v_attrs.TITLE
        else:
            return ''

    def _g_settitle(self, title):
        self._v_attrs.TITLE = title

    _v_title = property(_g_gettitle, _g_settitle, None,
                        ("A description of this node. A shorthand for "
                         "TITLE attribute."))

    # This may be looked up by ``__del__`` when ``__init__`` doesn't get
    # to be called.  See ticket #144 for more info.
    _v_isopen = False
    """Whehter this node is open or not."""

    _v_objectId = previous_api_property('_v_objectid')
    _v_maxTreeDepth = previous_api_property('_v_maxtreedepth')

    # The ``_log`` argument is only meant to be used by ``_g_copy_as_child()``
    # to avoid logging the creation of children nodes of a copied sub-tree.
    def __init__(self, parentnode, name, _log=True):
        # Remember to assign these values in the root group constructor
        # as it does not use this method implementation!

        # if the parent node is a softlink, dereference it
        if isinstance(parentnode, class_name_dict['SoftLink']):
            parentnode = parentnode.dereference()

        self._v_file = None
        """The hosting File instance (see :ref:`FileClassDescr`)."""

        self._v_isopen = False
        """Whether this node is open or not."""

        self._v_pathname = None
        """The path of this node in the tree (a string)."""

        self._v_name = None
        """The name of this node in its parent group (a string)."""

        self._v_depth = None
        """The depth of this node in the tree (an non-negative integer value).
        """

        self._v_maxtreedepth = parentnode._v_file.params['MAX_TREE_DEPTH']
        """Maximum tree depth before warning the user.

        .. versionchanged:: 3.0
           Renamed into *_v_maxtreedepth* from *_v_maxTreeDepth*.

        """

        self._v__deleting = False
        """Is the node being deleted?"""

        self._v_objectid = None
        """A node identifier (may change from run to run).

        .. versionchanged:: 3.0
           The *_v_objectID* attribute has been renamed into *_v_objectid*.

        """

        validate = new = self._v_new  # set by subclass constructor

        # Is the parent node a group?  Is it open?
        self._g_check_group(parentnode)
        parentnode._g_check_open()
        file_ = parentnode._v_file

        # Will the file be able to host a new node?
        if new:
            file_._check_writable()

        # Bind to the parent node and set location-dependent information.
        if new:
            # Only new nodes need to be referenced.
            # Opened nodes are already known by their parent group.
            parentnode._g_refnode(self, name, validate)
        self._g_set_location(parentnode, name)

        try:
            # hdf5extension operations:
            #   Update node attributes.
            self._g_new(parentnode, name, init=True)
            #   Create or open the node and get its object ID.
            if new:
                self._v_objectid = self._g_create()
            else:
                self._v_objectid = self._g_open()

            # The node *has* been created, log that.
            if new and _log and file_.is_undo_enabled():
                self._g_log_create()

            # This allows extra operations after creating the node.
            self._g_post_init_hook()
        except:
            # If anything happens, the node must be closed
            # to undo every possible registration made so far.
            # We do *not* rely on ``__del__()`` doing it later,
            # since it might never be called anyway.
            self._f_close()
            raise

    def _g_log_create(self):
        self._v_file._log('CREATE', self._v_pathname)

    _g_logCreate = previous_api(_g_log_create)

    def __del__(self):
        # Closed `Node` instances can not be killed and revived.
        # Instead, accessing a closed and deleted (from memory, not
        # disk) one yields a *new*, open `Node` instance.  This is
        # because of two reasons:
        #
        # 1. Predictability.  After closing a `Node` and deleting it,
        #    only one thing can happen when accessing it again: a new,
        #    open `Node` instance is returned.  If closed nodes could be
        #    revived, one could get either a closed or an open `Node`.
        #
        # 2. Ease of use.  If the user wants to access a closed node
        #    again, the only condition would be that no references to
        #    the `Node` instance were left.  If closed nodes could be
        #    revived, the user would also need to force the closed
        #    `Node` out of memory, which is not a trivial task.
        #

        if not self._v_isopen:
            return  # the node is already closed or not initialized

        self._v__deleting = True

        # If we get here, the `Node` is still open.
        try:
            node_manager = self._v_file._node_manager
            node_manager.drop_node(self, check_unregistered=False)
        finally:
            # At this point the node can still be open if there is still some
            # alive reference around (e.g. if the __del__ method is called
            # explicitly by the user).
            if self._v_isopen:
                self._v__deleting = True
                self._f_close()

    def _g_pre_kill_hook(self):
        """Code to be called before killing the node."""
        pass

    _g_preKillHook = previous_api(_g_pre_kill_hook)

    def _g_create(self):
        """Create a new HDF5 node and return its object identifier."""
        raise NotImplementedError

    def _g_open(self):
        """Open an existing HDF5 node and return its object identifier."""
        raise NotImplementedError

    def _g_check_open(self):
        """Check that the node is open.

        If the node is closed, a `ClosedNodeError` is raised.

        """

        if not self._v_isopen:
            raise ClosedNodeError("the node object is closed")
        assert self._v_file.isopen, "found an open node in a closed file"

    _g_checkOpen = previous_api(_g_check_open)

    def _g_set_location(self, parentnode, name):
        """Set location-dependent attributes.

        Sets the location-dependent attributes of this node to reflect
        that it is placed under the specified `parentnode`, with the
        specified `name`.

        This also triggers the insertion of file references to this
        node.  If the maximum recommended tree depth is exceeded, a
        `PerformanceWarning` is issued.

        """

        file_ = parentnode._v_file
        parentdepth = parentnode._v_depth

        self._v_file = file_
        self._v_isopen = True

        root_uep = file_.root_uep
        if name.startswith(root_uep):
            # This has been called from File._get_node()
            assert parentdepth == 0
            if root_uep == "/":
                self._v_pathname = name
            else:
                self._v_pathname = name[len(root_uep):]
            _, self._v_name = split_path(name)
            self._v_depth = name.count("/") - root_uep.count("/") + 1
        else:
            # If we enter here is because this has been called elsewhere
            self._v_name = name
            self._v_pathname = join_path(parentnode._v_pathname, name)
            self._v_depth = parentdepth + 1

        # Check if the node is too deep in the tree.
        if parentdepth >= self._v_maxtreedepth:
            warnings.warn(
                """\
node ``%s`` is exceeding the recommended maximum depth (%d);\
be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" %
                (self._v_pathname, self._v_maxtreedepth), PerformanceWarning)

        if self._v_pathname != '/':
            file_._node_manager.cache_node(self, self._v_pathname)

    _g_setLocation = previous_api(_g_set_location)

    def _g_update_location(self, newparentpath):
        """Update location-dependent attributes.

        Updates location data when an ancestor node has changed its
        location in the hierarchy to `newparentpath`.  In fact, this
        method is expected to be called by an ancestor of this node.

        This also triggers the update of file references to this node.
        If the maximum recommended node depth is exceeded, a
        `PerformanceWarning` is issued.  This warning is assured to be
        unique.

        """

        oldpath = self._v_pathname
        newpath = join_path(newparentpath, self._v_name)
        newdepth = newpath.count('/')

        self._v_pathname = newpath
        self._v_depth = newdepth

        # Check if the node is too deep in the tree.
        if newdepth > self._v_maxtreedepth:
            warnings.warn(
                """\
moved descendent node is exceeding the recommended maximum depth (%d);\
be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" %
                (self._v_maxtreedepth, ), PerformanceWarning)

        node_manager = self._v_file._node_manager
        node_manager.rename_node(oldpath, newpath)

        # Tell dependent objects about the new location of this node.
        self._g_update_dependent()

    _g_updateLocation = previous_api(_g_update_location)

    def _g_del_location(self):
        """Clear location-dependent attributes.

        This also triggers the removal of file references to this node.

        """

        node_manager = self._v_file._node_manager
        pathname = self._v_pathname

        if not self._v__deleting:
            node_manager.drop_from_cache(pathname)
            # Note: node_manager.drop_node do not removes the node form the
            # registry if it is still open
            node_manager.registry.pop(pathname, None)

        self._v_file = None
        self._v_isopen = False
        self._v_pathname = None
        self._v_name = None
        self._v_depth = None

    _g_delLocation = previous_api(_g_del_location)

    def _g_post_init_hook(self):
        """Code to be run after node creation and before creation logging."""
        pass

    _g_postInitHook = previous_api(_g_post_init_hook)

    def _g_update_dependent(self):
        """Update dependent objects after a location change.

        All dependent objects (but not nodes!) referencing this node
        must be updated here.

        """

        if '_v_attrs' in self.__dict__:
            self._v_attrs._g_update_node_location(self)

    _g_updateDependent = previous_api(_g_update_dependent)

    def _f_close(self):
        """Close this node in the tree.

        This releases all resources held by the node, so it should not
        be used again.  On nodes with data, it may be flushed to disk.

        You should not need to close nodes manually because they are
        automatically opened/closed when they are loaded/evicted from
        the integrated LRU cache.

        """

        # After calling ``_f_close()``, two conditions are met:
        #
        #   1. The node object is detached from the tree.
        #   2. *Every* attribute of the node is removed.
        #
        # Thus, cleanup operations used in ``_f_close()`` in sub-classes
        # must be run *before* calling the method in the superclass.

        if not self._v_isopen:
            return  # the node is already closed

        myDict = self.__dict__

        # Close the associated `AttributeSet`
        # only if it has already been placed in the object's dictionary.
        if '_v_attrs' in myDict:
            self._v_attrs._g_close()

        # Detach the node from the tree if necessary.
        self._g_del_location()

        # Finally, clear all remaining attributes from the object.
        myDict.clear()

        # Just add a final flag to signal that the node is closed:
        self._v_isopen = False

    def _g_remove(self, recursive, force):
        """Remove this node from the hierarchy.

        If the node has children, recursive removal must be stated by
        giving `recursive` a true value; otherwise, a `NodeError` will
        be raised.

        If `force` is set to true, the node will be removed no matter it
        has children or not (useful for deleting hard links).

        It does not log the change.

        """

        # Remove the node from the PyTables hierarchy.
        parent = self._v_parent
        parent._g_unrefnode(self._v_name)
        # Close the node itself.
        self._f_close()
        # hdf5extension operations:
        # Remove the node from the HDF5 hierarchy.
        self._g_delete(parent)

    def _f_remove(self, recursive=False, force=False):
        """Remove this node from the hierarchy.

        If the node has children, recursive removal must be stated by giving
        recursive a true value; otherwise, a NodeError will be raised.

        If the node is a link to a Group object, and you are sure that you want
        to delete it, you can do this by setting the force flag to true.

        """

        self._g_check_open()
        file_ = self._v_file
        file_._check_writable()

        if file_.is_undo_enabled():
            self._g_remove_and_log(recursive, force)
        else:
            self._g_remove(recursive, force)

    def _g_remove_and_log(self, recursive, force):
        file_ = self._v_file
        oldpathname = self._v_pathname
        # Log *before* moving to use the right shadow name.
        file_._log('REMOVE', oldpathname)
        move_to_shadow(file_, oldpathname)

    _g_removeAndLog = previous_api(_g_remove_and_log)

    def _g_move(self, newparent, newname):
        """Move this node in the hierarchy.

        Moves the node into the given `newparent`, with the given
        `newname`.

        It does not log the change.

        """

        oldparent = self._v_parent
        oldname = self._v_name
        oldpathname = self._v_pathname  # to move the HDF5 node

        # Try to insert the node into the new parent.
        newparent._g_refnode(self, newname)
        # Remove the node from the new parent.
        oldparent._g_unrefnode(oldname)

        # Remove location information for this node.
        self._g_del_location()
        # Set new location information for this node.
        self._g_set_location(newparent, newname)

        # hdf5extension operations:
        #   Update node attributes.
        self._g_new(newparent, self._v_name, init=False)
        #   Move the node.
        # self._v_parent._g_move_node(oldpathname, self._v_pathname)
        self._v_parent._g_move_node(oldparent._v_objectid, oldname,
                                    newparent._v_objectid, newname,
                                    oldpathname, self._v_pathname)

        # Tell dependent objects about the new location of this node.
        self._g_update_dependent()

    def _f_rename(self, newname, overwrite=False):
        """Rename this node in place.

        Changes the name of a node to *newname* (a string).  If a node with the
        same newname already exists and overwrite is true, recursively remove
        it before renaming.

        """

        self._f_move(newname=newname, overwrite=overwrite)

    def _f_move(self,
                newparent=None,
                newname=None,
                overwrite=False,
                createparents=False):
        """Move or rename this node.

        Moves a node into a new parent group, or changes the name of the
        node. newparent can be a Group object (see :ref:`GroupClassDescr`) or a
        pathname in string form. If it is not specified or None, the current
        parent group is chosen as the new parent.  newname must be a string
        with a new name. If it is not specified or None, the current name is
        chosen as the new name. If createparents is true, the needed groups for
        the given new parent group path to exist will be created.

        Moving a node across databases is not allowed, nor it is moving a node
        *into* itself. These result in a NodeError. However, moving a node
        *over* itself is allowed and simply does nothing. Moving over another
        existing node is similarly not allowed, unless the optional overwrite
        argument is true, in which case that node is recursively removed before
        moving.

        Usually, only the first argument will be used, effectively moving the
        node to a new location without changing its name.  Using only the
        second argument is equivalent to renaming the node in place.

        """

        self._g_check_open()
        file_ = self._v_file
        oldparent = self._v_parent
        oldname = self._v_name

        # Set default arguments.
        if newparent is None and newname is None:
            raise NodeError("you should specify at least "
                            "a ``newparent`` or a ``newname`` parameter")
        if newparent is None:
            newparent = oldparent
        if newname is None:
            newname = oldname

        # Get destination location.
        if hasattr(newparent, '_v_file'):  # from node
            newfile = newparent._v_file
            newpath = newparent._v_pathname
        elif hasattr(newparent, 'startswith'):  # from path
            newfile = file_
            newpath = newparent
        else:
            raise TypeError("new parent is not a node nor a path: %r" %
                            (newparent, ))

        # Validity checks on arguments.
        # Is it in the same file?
        if newfile is not file_:
            raise NodeError("nodes can not be moved across databases; "
                            "please make a copy of the node")

        # The movement always fails if the hosting file can not be modified.
        file_._check_writable()

        # Moving over itself?
        oldpath = oldparent._v_pathname
        if newpath == oldpath and newname == oldname:
            # This is equivalent to renaming the node to its current name,
            # and it does not change the referenced object,
            # so it is an allowed no-op.
            return

        # Moving into itself?
        self._g_check_not_contains(newpath)

        # Note that the previous checks allow us to go ahead and create
        # the parent groups if `createparents` is true.  `newparent` is
        # used instead of `newpath` to avoid accepting `Node` objects
        # when `createparents` is true.
        newparent = file_._get_or_create_path(newparent, createparents)
        self._g_check_group(newparent)  # Is it a group?

        # Moving over an existing node?
        self._g_maybe_remove(newparent, newname, overwrite)

        # Move the node.
        oldpathname = self._v_pathname
        self._g_move(newparent, newname)

        # Log the change.
        if file_.is_undo_enabled():
            self._g_log_move(oldpathname)

    def _g_log_move(self, oldpathname):
        self._v_file._log('MOVE', oldpathname, self._v_pathname)

    _g_logMove = previous_api(_g_log_move)

    def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs):
        """Copy this node and return the new one.

        Creates and returns a copy of the node in the given `newparent`,
        with the given `newname`.  If `recursive` copy is stated, all
        descendents are copied as well.  Additional keyword argumens may
        affect the way that the copy is made.  Unknown arguments must be
        ignored.  On recursive copies, all keyword arguments must be
        passed on to the children invocation of this method.

        If `_log` is false, the change is not logged.  This is *only*
        intended to be used by ``_g_copy_as_child()`` as a means of
        optimising sub-tree copies.

        """

        raise NotImplementedError

    def _g_copy_as_child(self, newparent, **kwargs):
        """Copy this node as a child of another group.

        Copies just this node into `newparent`, not recursing children
        nor overwriting nodes nor logging the copy.  This is intended to
        be used when copying whole sub-trees.

        """

        return self._g_copy(newparent,
                            self._v_name,
                            recursive=False,
                            _log=False,
                            **kwargs)

    _g_copyAsChild = previous_api(_g_copy_as_child)

    def _f_copy(self,
                newparent=None,
                newname=None,
                overwrite=False,
                recursive=False,
                createparents=False,
                **kwargs):
        """Copy this node and return the new node.

        Creates and returns a copy of the node, maybe in a different place in
        the hierarchy. newparent can be a Group object (see
        :ref:`GroupClassDescr`) or a pathname in string form. If it is not
        specified or None, the current parent group is chosen as the new
        parent.  newname must be a string with a new name. If it is not
        specified or None, the current name is chosen as the new name. If
        recursive copy is stated, all descendants are copied as well. If
        createparents is true, the needed groups for the given new parent group
        path to exist will be created.

        Copying a node across databases is supported but can not be
        undone. Copying a node over itself is not allowed, nor it is
        recursively copying a node into itself. These result in a
        NodeError. Copying over another existing node is similarly not allowed,
        unless the optional overwrite argument is true, in which case that node
        is recursively removed before copying.

        Additional keyword arguments may be passed to customize the copying
        process. For instance, title and filters may be changed, user
        attributes may be or may not be copied, data may be sub-sampled, stats
        may be collected, etc. See the documentation for the particular node
        type.

        Using only the first argument is equivalent to copying the node to a
        new location without changing its name. Using only the second argument
        is equivalent to making a copy of the node in the same group.

        """

        self._g_check_open()
        srcfile = self._v_file
        srcparent = self._v_parent
        srcname = self._v_name

        dstparent = newparent
        dstname = newname

        # Set default arguments.
        if dstparent is None and dstname is None:
            raise NodeError("you should specify at least "
                            "a ``newparent`` or a ``newname`` parameter")
        if dstparent is None:
            dstparent = srcparent
        if dstname is None:
            dstname = srcname

        # Get destination location.
        if hasattr(dstparent, '_v_file'):  # from node
            dstfile = dstparent._v_file
            dstpath = dstparent._v_pathname
        elif hasattr(dstparent, 'startswith'):  # from path
            dstfile = srcfile
            dstpath = dstparent
        else:
            raise TypeError("new parent is not a node nor a path: %r" %
                            (dstparent, ))

        # Validity checks on arguments.
        if dstfile is srcfile:
            # Copying over itself?
            srcpath = srcparent._v_pathname
            if dstpath == srcpath and dstname == srcname:
                raise NodeError(
                    "source and destination nodes are the same node: ``%s``" %
                    self._v_pathname)

            # Recursively copying into itself?
            if recursive:
                self._g_check_not_contains(dstpath)

        # Note that the previous checks allow us to go ahead and create
        # the parent groups if `createparents` is true.  `dstParent` is
        # used instead of `dstPath` because it may be in other file, and
        # to avoid accepting `Node` objects when `createparents` is
        # true.
        dstparent = srcfile._get_or_create_path(dstparent, createparents)
        self._g_check_group(dstparent)  # Is it a group?

        # Copying to another file with undo enabled?
        if dstfile is not srcfile and srcfile.is_undo_enabled():
            warnings.warn(
                "copying across databases can not be undone "
                "nor redone from this database", UndoRedoWarning)

        # Copying over an existing node?
        self._g_maybe_remove(dstparent, dstname, overwrite)

        # Copy the node.
        # The constructor of the new node takes care of logging.
        return self._g_copy(dstparent, dstname, recursive, **kwargs)

    def _f_isvisible(self):
        """Is this node visible?"""

        self._g_check_open()
        return isvisiblepath(self._v_pathname)

    _f_isVisible = previous_api(_f_isvisible)

    def _g_check_group(self, node):
        # Node must be defined in order to define a Group.
        # However, we need to know Group here.
        # Using class_name_dict avoids a circular import.
        if not isinstance(node, class_name_dict['Node']):
            raise TypeError("new parent is not a registered node: %s" %
                            node._v_pathname)
        if not isinstance(node, class_name_dict['Group']):
            raise TypeError("new parent node ``%s`` is not a group" %
                            node._v_pathname)

    _g_checkGroup = previous_api(_g_check_group)

    def _g_check_not_contains(self, pathname):
        # The not-a-TARDIS test. ;)
        mypathname = self._v_pathname
        if (mypathname == '/'  # all nodes fall below the root group
                or pathname == mypathname
                or pathname.startswith(mypathname + '/')):
            raise NodeError("can not move or recursively copy node ``%s`` "
                            "into itself" % mypathname)

    _g_checkNotContains = previous_api(_g_check_not_contains)

    def _g_maybe_remove(self, parent, name, overwrite):
        if name in parent:
            if not overwrite:
                raise NodeError("""\
destination group ``%s`` already has a node named ``%s``; \
you may want to use the ``overwrite`` argument""" % (parent._v_pathname, name))
            parent._f_get_child(name)._f_remove(True)

    _g_maybeRemove = previous_api(_g_maybe_remove)

    def _g_check_name(self, name):
        """Check validity of name for this particular kind of node.

        This is invoked once the standard HDF5 and natural naming checks
        have successfully passed.

        """

        if name.startswith('_i_'):
            # This is reserved for table index groups.
            raise ValueError(
                "node name starts with reserved prefix ``_i_``: %s" % name)

    _g_checkName = previous_api(_g_check_name)

    # <attribute handling>
    def _f_getattr(self, name):
        """Get a PyTables attribute from this node.

        If the named attribute does not exist, an AttributeError is
        raised.

        """

        return getattr(self._v_attrs, name)

    _f_getAttr = previous_api(_f_getattr)

    def _f_setattr(self, name, value):
        """Set a PyTables attribute for this node.

        If the node already has a large number of attributes, a
        PerformanceWarning is issued.

        """

        setattr(self._v_attrs, name, value)

    _f_setAttr = previous_api(_f_setattr)

    def _f_delattr(self, name):
        """Delete a PyTables attribute from this node.

        If the named attribute does not exist, an AttributeError is
        raised.

        """

        delattr(self._v_attrs, name)

    _f_delAttr = previous_api(_f_delattr)
Ejemplo n.º 12
0
class Description(object):
    """This class represents descriptions of the structure of tables.

    An instance of this class is automatically bound to Table (see
    :ref:`TableClassDescr`) objects when they are created.  It provides a
    browseable representation of the structure of the table, made of non-nested
    (Col - see :ref:`ColClassDescr`) and nested (Description) columns.

    Column definitions under a description can be accessed as attributes of it
    (*natural naming*). For instance, if table.description is a Description
    instance with a column named col1 under it, the later can be accessed as
    table.description.col1. If col1 is nested and contains a col2 column, this
    can be accessed as table.description.col1.col2. Because of natural naming,
    the names of members start with special prefixes, like in the Group class
    (see :ref:`GroupClassDescr`).


    .. rubric:: Description attributes

    .. attribute:: _v_colobjects

        A dictionary mapping the names of the columns hanging
        directly from the associated table or nested column to their
        respective descriptions (Col - see :ref:`ColClassDescr` or
        Description - see :ref:`DescriptionClassDescr` instances).

        .. versionchanged:: 3.0
           The *_v_colObjects* attobute has been renamed into
           *_v_colobjects*.

    .. attribute:: _v_dflts

        A dictionary mapping the names of non-nested columns
        hanging directly from the associated table or nested column
        to their respective default values.

    .. attribute:: _v_dtype

        The NumPy type which reflects the structure of this
        table or nested column.  You can use this as the
        dtype argument of NumPy array factories.

    .. attribute:: _v_dtypes

        A dictionary mapping the names of non-nested columns
        hanging directly from the associated table or nested column
        to their respective NumPy types.

    .. attribute:: _v_is_nested

        Whether the associated table or nested column contains
        further nested columns or not.

    .. attribute:: _v_itemsize

        The size in bytes of an item in this table or nested column.

    .. attribute:: _v_name

        The name of this description group. The name of the
        root group is '/'.

    .. attribute:: _v_names

        A list of the names of the columns hanging directly
        from the associated table or nested column. The order of the
        names matches the order of their respective columns in the
        containing table.

    .. attribute:: _v_nested_descr

        A nested list of pairs of (name, format) tuples for all the columns
        under this table or nested column. You can use this as the dtype and
        descr arguments of NumPy array factories.

        .. versionchanged:: 3.0
           The *_v_nestedDescr* attribute has been renamed into
           *_v_nested_descr*.

    .. attribute:: _v_nested_formats

        A nested list of the NumPy string formats (and shapes) of all the
        columns under this table or nested column. You can use this as the
        formats argument of NumPy array factories.

        .. versionchanged:: 3.0
           The *_v_nestedFormats* attribute has been renamed into
           *_v_nested_formats*.

    .. attribute:: _v_nestedlvl

        The level of the associated table or nested column in the nested
        datatype.

    .. attribute:: _v_nested_names

        A nested list of the names of all the columns under this table or
        nested column. You can use this as the names argument of NumPy array
        factories.

        .. versionchanged:: 3.0
           The *_v_nestedNames* attribute has been renamed into
           *_v_nested_names*.

    .. attribute:: _v_pathname

        Pathname of the table or nested column.

    .. attribute:: _v_pathnames

        A list of the pathnames of all the columns under this table or nested
        column (in preorder).  If it does not contain nested columns, this is
        exactly the same as the :attr:`Description._v_names` attribute.

    .. attribute:: _v_types

        A dictionary mapping the names of non-nested columns hanging directly
        from the associated table or nested column to their respective PyTables
        types.

    """

    _v_colObjects = previous_api_property('_v_colobjects')
    _v_nestedFormats = previous_api_property('_v_nested_formats')
    _v_nestedNames = previous_api_property('_v_nested_names')
    _v_nestedDesct = previous_api_property('_v_nested_descr')

    def __init__(self, classdict, nestedlvl=-1, validate=True):

        if not classdict:
            raise ValueError("cannot create an empty data type")

        # Do a shallow copy of classdict just in case this is going to
        # be shared by other instances
        newdict = self.__dict__
        newdict["_v_name"] = "/"  # The name for root descriptor
        newdict["_v_names"] = []
        newdict["_v_dtypes"] = {}
        newdict["_v_types"] = {}
        newdict["_v_dflts"] = {}
        newdict["_v_colobjects"] = {}
        newdict["_v_is_nested"] = False
        nestedFormats = []
        nestedDType = []

        if not hasattr(newdict, "_v_nestedlvl"):
            newdict["_v_nestedlvl"] = nestedlvl + 1

        cols_with_pos = []  # colum (position, name) pairs
        cols_no_pos = []  # just column names

        # Check for special variables and convert column descriptions
        for (name, descr) in classdict.iteritems():
            if name.startswith('_v_'):
                if name in newdict:
                    # print("Warning!")
                    # special methods &c: copy to newdict, warn about conflicts
                    warnings.warn("Can't set attr %r in description class %r" %
                                  (name, self))
                else:
                    # print("Special variable!-->", name, classdict[name])
                    newdict[name] = descr
                continue  # This variable is not needed anymore

            columns = None
            if (type(descr) == type(IsDescription)
                    and issubclass(descr, IsDescription)):
                # print("Nested object (type I)-->", name)
                columns = descr().columns
            elif (type(descr.__class__) == type(IsDescription)
                  and issubclass(descr.__class__, IsDescription)):
                # print("Nested object (type II)-->", name)
                columns = descr.columns
            elif isinstance(descr, dict):
                # print("Nested object (type III)-->", name)
                columns = descr
            else:
                # print("Nested object (type IV)-->", name)
                descr = copy.copy(descr)
            # The copies above and below ensure that the structures
            # provided by the user will remain unchanged even if we
            # tamper with the values of ``_v_pos`` here.
            if columns is not None:
                descr = Description(copy.copy(columns), self._v_nestedlvl)
            classdict[name] = descr

            pos = getattr(descr, '_v_pos', None)
            if pos is None:
                cols_no_pos.append(name)
            else:
                cols_with_pos.append((pos, name))

        # Sort field names:
        #
        # 1. Fields with explicit positions, according to their
        #    positions (and their names if coincident).
        # 2. Fields with no position, in alfabetical order.
        cols_with_pos.sort()
        cols_no_pos.sort()
        keys = [name for (pos, name) in cols_with_pos] + cols_no_pos

        pos = 0
        # Get properties for compound types
        for k in keys:
            if validate:
                # Check for key name validity
                check_name_validity(k)
            # Class variables
            object = classdict[k]
            newdict[k] = object  # To allow natural naming
            if not (isinstance(object, Col)
                    or isinstance(object, Description)):
                raise TypeError('Passing an incorrect value to a table column.'
                                ' Expected a Col (or subclass) instance and '
                                'got: "%s". Please make use of the Col(), or '
                                'descendant, constructor to properly '
                                'initialize columns.' % object)
            object._v_pos = pos  # Set the position of this object
            object._v_parent = self  # The parent description
            pos += 1
            newdict['_v_colobjects'][k] = object
            newdict['_v_names'].append(k)
            object.__dict__['_v_name'] = k

            if not isinstance(k, str):
                # numpy only accepts "str" for field names
                if sys.version_info[0] < 3:
                    # Python 2.x: unicode --> str
                    kk = k.encode()  # use the default encoding
                else:
                    # Python 3.x: bytes --> str (unicode)
                    kk = k.decode()
            else:
                kk = k

            if isinstance(object, Col):
                dtype = object.dtype
                newdict['_v_dtypes'][k] = dtype
                newdict['_v_types'][k] = object.type
                newdict['_v_dflts'][k] = object.dflt
                nestedFormats.append(object.recarrtype)
                baserecarrtype = dtype.base.str[1:]
                nestedDType.append((kk, baserecarrtype, dtype.shape))
            else:  # A description
                nestedFormats.append(object._v_nested_formats)
                nestedDType.append((kk, object._v_dtype))

        # Assign the format list to _v_nested_formats
        newdict['_v_nested_formats'] = nestedFormats
        newdict['_v_dtype'] = numpy.dtype(nestedDType)
        # _v_itemsize is derived from the _v_dtype that already computes this
        newdict['_v_itemsize'] = newdict['_v_dtype'].itemsize
        if self._v_nestedlvl == 0:
            # Get recursively nested _v_nested_names and _v_nested_descr attrs
            self._g_set_nested_names_descr()
            # Get pathnames for nested groups
            self._g_set_path_names()
            # Check the _v_byteorder has been used an issue an Error
            if hasattr(self, "_v_byteorder"):
                raise ValueError(
                    "Using a ``_v_byteorder`` in the description is obsolete. "
                    "Use the byteorder parameter in the constructor instead.")

    def _g_set_nested_names_descr(self):
        """Computes the nested names and descriptions for nested datatypes."""

        names = self._v_names
        fmts = self._v_nested_formats
        self._v_nested_names = names[:]  # Important to do a copy!
        self._v_nested_descr = [(names[i], fmts[i]) for i in range(len(names))]
        for i in range(len(names)):
            name = names[i]
            new_object = self._v_colobjects[name]
            if isinstance(new_object, Description):
                new_object._g_set_nested_names_descr()
                # replace the column nested name by a correct tuple
                self._v_nested_names[i] = (name, new_object._v_nested_names)
                self._v_nested_descr[i] = (name, new_object._v_nested_descr)
                # set the _v_is_nested flag
                self._v_is_nested = True

    _g_setNestedNamesDescr = previous_api(_g_set_nested_names_descr)

    def _g_set_path_names(self):
        """Compute the pathnames for arbitrary nested descriptions.

        This method sets the ``_v_pathname`` and ``_v_pathnames``
        attributes of all the elements (both descriptions and columns)
        in this nested description.

        """
        def get_cols_in_order(description):
            return [
                description._v_colobjects[colname]
                for colname in description._v_names
            ]

        def join_paths(path1, path2):
            if not path1:
                return path2
            return '%s/%s' % (path1, path2)

        # The top of the stack always has a nested description
        # and a list of its child columns
        # (be they nested ``Description`` or non-nested ``Col`` objects).
        # In the end, the list contains only a list of column paths
        # under this one.
        #
        # For instance, given this top of the stack::
        #
        #   (<Description X>, [<Column A>, <Column B>])
        #
        # After computing the rest of the stack, the top is::
        #
        #   (<Description X>, ['a', 'a/m', 'a/n', ... , 'b', ...])

        stack = []

        # We start by pushing the top-level description
        # and its child columns.
        self._v_pathname = ''
        stack.append((self, get_cols_in_order(self)))

        while stack:
            desc, cols = stack.pop()
            head = cols[0]

            # What's the first child in the list?
            if isinstance(head, Description):
                # A nested description.  We remove it from the list and
                # push it with its child columns.  This will be the next
                # handled description.
                head._v_pathname = join_paths(desc._v_pathname, head._v_name)
                stack.append((desc, cols[1:]))  # alter the top
                stack.append((head, get_cols_in_order(head)))  # new top
            elif isinstance(head, Col):
                # A non-nested column.  We simply remove it from the
                # list and append its name to it.
                head._v_pathname = join_paths(desc._v_pathname, head._v_name)
                cols.append(head._v_name)  # alter the top
                stack.append((desc, cols[1:]))  # alter the top
            else:
                # Since paths and names are appended *to the end* of
                # children lists, a string signals that no more children
                # remain to be processed, so we are done with the
                # description at the top of the stack.
                assert isinstance(head, basestring)
                # Assign the computed set of descendent column paths.
                desc._v_pathnames = cols
                if len(stack) > 0:
                    # Compute the paths with respect to the parent node
                    # (including the path of the current description)
                    # and append them to its list.
                    descName = desc._v_name
                    colPaths = [join_paths(descName, path) for path in cols]
                    colPaths.insert(0, descName)
                    parentCols = stack[-1][1]
                    parentCols.extend(colPaths)
                # (Nothing is pushed, we are done with this description.)

    _g_setPathNames = previous_api(_g_set_path_names)

    def _f_walk(self, type='All'):
        """Iterate over nested columns.

        If type is 'All' (the default), all column description objects (Col and
        Description instances) are yielded in top-to-bottom order (preorder).

        If type is 'Col' or 'Description', only column descriptions of that
        type are yielded.

        """

        if type not in ["All", "Col", "Description"]:
            raise ValueError("""\
type can only take the parameters 'All', 'Col' or 'Description'.""")

        stack = [self]
        while stack:
            object = stack.pop(0)  # pop at the front so as to ensure the order
            if type in ["All", "Description"]:
                yield object  # yield description
            names = object._v_names
            for i in range(len(names)):
                new_object = object._v_colobjects[names[i]]
                if isinstance(new_object, Description):
                    stack.append(new_object)
                else:
                    if type in ["All", "Col"]:
                        yield new_object  # yield column

    def __repr__(self):
        """Gives a detailed Description column representation."""

        rep = [
            '%s\"%s\": %r' %
            ("  " * self._v_nestedlvl, k, self._v_colobjects[k])
            for k in self._v_names
        ]
        return '{\n  %s}' % (',\n  '.join(rep))

    def __str__(self):
        """Gives a brief Description representation."""

        return 'Description(%s)' % self._v_nested_descr
Ejemplo n.º 13
0
class Group(hdf5extension.Group, Node):
    """Basic PyTables grouping structure.

    Instances of this class are grouping structures containing *child*
    instances of zero or more groups or leaves, together with
    supporting metadata. Each group has exactly one *parent* group.

    Working with groups and leaves is similar in many ways to working
    with directories and files, respectively, in a Unix filesystem.
    As with Unix directories and files, objects in the object tree are
    often described by giving their full (or absolute) path names.
    This full path can be specified either as a string (like in
    '/group1/group2') or as a complete object path written in *natural
    naming* schema (like in file.root.group1.group2).

    A collateral effect of the *natural naming* schema is that the
    names of members in the Group class and its instances must be
    carefully chosen to avoid colliding with existing children node
    names.  For this reason and to avoid polluting the children
    namespace all members in a Group start with some reserved prefix,
    like _f_ (for public methods), _g_ (for private ones), _v_ (for
    instance variables) or _c_ (for class variables). Any attempt to
    create a new child node whose name starts with one of these
    prefixes will raise a ValueError exception.

    Another effect of natural naming is that children named after
    Python keywords or having names not valid as Python identifiers
    (e.g.  class, $a or 44) can not be accessed using the node.child
    syntax. You will be forced to use node._f_get_child(child) to
    access them (which is recommended for programmatic accesses).

    You will also need to use _f_get_child() to access an existing
    child node if you set a Python attribute in the Group with the
    same name as that node (you will get a NaturalNameWarning when
    doing this).

    Parameters
    ----------
    parentnode
        The parent :class:`Group` object.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*

    name : str
        The name of this node in its parent group.
    title
        The title for this group
    new
        If this group is new or has to be read from disk
    filters : Filters
        A Filters instance


    Notes
    -----
    The following documentation includes methods that are automatically
    called when a Group instance is accessed in a special way.

    For instance, this class defines the __setattr__, __getattr__, and
    __delattr__ methods, and they set, get and delete *ordinary Python
    attributes* as normally intended. In addition to that, __getattr__
    allows getting *child nodes* by their name for the sake of easy
    interaction on the command line, as long as there is no Python
    attribute with the same name. Groups also allow the interactive
    completion (when using readline) of the names of child nodes.
    For instance::

        # get a Python attribute
        nchild = group._v_nchildren

        # Add a Table child called 'table' under 'group'.
        h5file.create_table(group, 'table', myDescription)
        table = group.table          # get the table child instance
        group.table = 'foo'          # set a Python attribute

        # (PyTables warns you here about using the name of a child node.)
        foo = group.table            # get a Python attribute
        del group.table              # delete a Python attribute
        table = group.table          # get the table child instance again


    .. rubric:: Group attributes

    The following instance variables are provided in addition to those
    in Node (see :ref:`NodeClassDescr`):

    .. attribute:: _v_children

        Dictionary with all nodes hanging from this group.

    .. attribute:: _v_groups

        Dictionary with all groups hanging from this group.

    .. attribute:: _v_hidden

        Dictionary with all hidden nodes hanging from this group.

    .. attribute:: _v_leaves

        Dictionary with all leaves hanging from this group.

    .. attribute:: _v_links

        Dictionary with all links hanging from this group.

    .. attribute:: _v_unknown

        Dictionary with all unknown nodes hanging from this group.

    """

    # Class identifier.
    _c_classid = 'GROUP'

    _c_classId = previous_api_property('_c_classid')

    # Children containers that should be loaded only in a lazy way.
    # These are documented in the ``Group._g_add_children_names`` method.
    _c_lazy_children_attrs = ('__members__', '_v_children', '_v_groups',
                              '_v_leaves', '_v_links', '_v_unknown',
                              '_v_hidden')

    # `_v_nchildren` is a direct read-only shorthand
    # for the number of *visible* children in a group.
    def _g_getnchildren(self):
        return len(self._v_children)

    _v_nchildren = property(_g_getnchildren, None, None,
                            "The number of children hanging from this group.")

    # `_v_filters` is a direct read-write shorthand for the ``FILTERS``
    # attribute with the default `Filters` instance as a default value.
    def _g_getfilters(self):
        filters = getattr(self._v_attrs, 'FILTERS', None)
        if filters is None:
            filters = Filters()
        return filters

    def _g_setfilters(self, value):
        if not isinstance(value, Filters):
            raise TypeError("value is not an instance of `Filters`: %r" %
                            (value, ))
        self._v_attrs.FILTERS = value

    def _g_delfilters(self):
        del self._v_attrs.FILTERS

    _v_filters = property(
        _g_getfilters, _g_setfilters, _g_delfilters,
        """Default filter properties for child nodes.

        You can (and are encouraged to) use this property to get, set and
        delete the FILTERS HDF5 attribute of the group, which stores a Filters
        instance (see :ref:`FiltersClassDescr`). When the group has no such
        attribute, a default Filters instance is used.
        """)

    _v_maxGroupWidth = previous_api_property('_v_max_group_width')

    def __init__(self,
                 parentnode,
                 name,
                 title="",
                 new=False,
                 filters=None,
                 _log=True):

        # Remember to assign these values in the root group constructor
        # if it does not use this one!

        # First, set attributes belonging to group objects.

        self._v_version = obversion
        """The object version of this group."""

        self._v_new = new
        """Is this the first time the node has been created?"""

        self._v_new_title = title
        """New title for this node."""

        self._v_new_filters = filters
        """New default filter properties for child nodes."""

        self._v_max_group_width = parentnode._v_file.params['MAX_GROUP_WIDTH']
        """Maximum number of children on each group before warning the user.

        .. versionchanged:: 3.0
           The *_v_maxGroupWidth* attribute has been renamed into
           *_v_max_group_width*.

        """

        # Finally, set up this object as a node.
        super(Group, self).__init__(parentnode, name, _log)

    def _g_post_init_hook(self):
        if self._v_new:
            if self._v_file.params['PYTABLES_SYS_ATTRS']:
                # Save some attributes for the new group on disk.
                set_attr = self._v_attrs._g__setattr
                # Set the title, class and version attributes.
                set_attr('TITLE', self._v_new_title)
                set_attr('CLASS', self._c_classid)
                set_attr('VERSION', self._v_version)

                # Set the default filter properties.
                newfilters = self._v_new_filters
                if newfilters is None:
                    # If no filters have been passed in the constructor,
                    # inherit them from the parent group, but only if they
                    # have been inherited or explicitly set.
                    newfilters = getattr(self._v_parent._v_attrs, 'FILTERS',
                                         None)
                if newfilters is not None:
                    set_attr('FILTERS', newfilters)
        else:
            # If the file has PyTables format, get the VERSION attr
            if 'VERSION' in self._v_attrs._v_attrnamessys:
                self._v_version = self._v_attrs.VERSION
            else:
                self._v_version = "0.0 (unknown)"
            # We don't need to get more attributes from disk,
            # since the most important ones are defined as properties.

    _g_postInitHook = previous_api(_g_post_init_hook)

    def __del__(self):
        if (self._v_isopen
                and self._v_pathname in self._v_file._node_manager.registry
                and '_v_children' in self.__dict__):
            # The group is going to be killed.  Rebuild weak references
            # (that Python cancelled just before calling this method) so
            # that they are still usable if the object is revived later.
            selfref = weakref.ref(self)
            self._v_children.containerref = selfref
            self._v_groups.containerref = selfref
            self._v_leaves.containerref = selfref
            self._v_links.containerref = selfref
            self._v_unknown.containerref = selfref
            self._v_hidden.containerref = selfref

        super(Group, self).__del__()

    def _g_get_child_group_class(self, childname):
        """Get the class of a not-yet-loaded group child.

        `childname` must be the name of a *group* child.

        """

        childCID = self._g_get_gchild_attr(childname, 'CLASS')
        if childCID is not None and not isinstance(childCID, str):
            childCID = childCID.decode('utf-8')

        if childCID in class_id_dict:
            return class_id_dict[childCID]  # look up group class
        else:
            return Group  # default group class

    _g_getChildGroupClass = previous_api(_g_get_child_group_class)

    def _g_get_child_leaf_class(self, childname, warn=True):
        """Get the class of a not-yet-loaded leaf child.

        `childname` must be the name of a *leaf* child.  If the child
        belongs to an unknown kind of leaf, or if its kind can not be
        guessed, `UnImplemented` will be returned and a warning will be
        issued if `warn` is true.

        """

        if self._v_file.params['PYTABLES_SYS_ATTRS']:
            childCID = self._g_get_lchild_attr(childname, 'CLASS')
            if childCID is not None and not isinstance(childCID, str):
                childCID = childCID.decode('utf-8')
        else:
            childCID = None

        if childCID in class_id_dict:
            return class_id_dict[childCID]  # look up leaf class
        else:
            # Unknown or no ``CLASS`` attribute, try a guess.
            childCID2 = utilsextension.which_class(self._v_objectid, childname)
            if childCID2 == 'UNSUPPORTED':
                if warn:
                    if childCID is None:
                        warnings.warn(
                            "leaf ``%s`` is of an unsupported type; "
                            "it will become an ``UnImplemented`` node" %
                            self._g_join(childname))
                    else:
                        warnings.warn(
                            ("leaf ``%s`` has an unknown class ID ``%s``; "
                             "it will become an ``UnImplemented`` node") %
                            (self._g_join(childname), childCID))
                return UnImplemented
            assert childCID2 in class_id_dict
            return class_id_dict[childCID2]  # look up leaf class

    _g_getChildLeafClass = previous_api(_g_get_child_leaf_class)

    def _g_add_children_names(self):
        """Add children names to this group taking into account their
        visibility and kind."""

        mydict = self.__dict__

        # The names of the lazy attributes
        mydict['__members__'] = members = []
        """The names of visible children nodes for readline-style completion.
        """
        mydict['_v_children'] = children = _ChildrenDict(self)
        """The number of children hanging from this group."""
        mydict['_v_groups'] = groups = _ChildrenDict(self)
        """Dictionary with all groups hanging from this group."""
        mydict['_v_leaves'] = leaves = _ChildrenDict(self)
        """Dictionary with all leaves hanging from this group."""
        mydict['_v_links'] = links = _ChildrenDict(self)
        """Dictionary with all links hanging from this group."""
        mydict['_v_unknown'] = unknown = _ChildrenDict(self)
        """Dictionary with all unknown nodes hanging from this group."""
        mydict['_v_hidden'] = hidden = _ChildrenDict(self)
        """Dictionary with all hidden nodes hanging from this group."""

        # Get the names of *all* child groups and leaves.
        (group_names, leaf_names, link_names, unknown_names) = \
            self._g_list_group(self._v_parent)

        # Separate groups into visible groups and hidden nodes,
        # and leaves into visible leaves and hidden nodes.
        for (childnames,
             childdict) in ((group_names, groups), (leaf_names, leaves),
                            (link_names, links), (unknown_names, unknown)):

            for childname in childnames:
                # See whether the name implies that the node is hidden.
                # (Assigned values are entirely irrelevant.)
                if isvisiblename(childname):
                    # Visible node.
                    members.insert(0, childname)
                    children[childname] = None
                    childdict[childname] = None
                else:
                    # Hidden node.
                    hidden[childname] = None

    _g_addChildrenNames = previous_api(_g_add_children_names)

    def _g_check_has_child(self, name):
        """Check whether 'name' is a children of 'self' and return its type."""

        # Get the HDF5 name matching the PyTables name.
        node_type = self._g_get_objinfo(name)
        if node_type == "NoSuchNode":
            raise NoSuchNodeError(
                "group ``%s`` does not have a child named ``%s``" %
                (self._v_pathname, name))
        return node_type

    _g_checkHasChild = previous_api(_g_check_has_child)

    def __iter__(self):
        """Iterate over the child nodes hanging directly from the group.

        This iterator is *not* recursive.

        Examples
        --------

        ::

            # Non-recursively list all the nodes hanging from '/detector'
            print("Nodes in '/detector' group:")
            for node in h5file.root.detector:
                print(node)

        """

        return self._f_iter_nodes()

    def __contains__(self, name):
        """Is there a child with that `name`?

        Returns a true value if the group has a child node (visible or
        hidden) with the given `name` (a string), false otherwise.

        """

        self._g_check_open()
        try:
            self._g_check_has_child(name)
        except NoSuchNodeError:
            return False
        return True

    def _f_walknodes(self, classname=None):
        """Iterate over descendant nodes.

        This method recursively walks *self* top to bottom (preorder),
        iterating over child groups in alphanumerical order, and yielding
        nodes.  If classname is supplied, only instances of the named class are
        yielded.

        If *classname* is Group, it behaves like :meth:`Group._f_walk_groups`,
        yielding only groups.  If you don't want a recursive behavior,
        use :meth:`Group._f_iter_nodes` instead.

        Examples
        --------

        ::

            # Recursively print all the arrays hanging from '/'
            print("Arrays in the object tree '/':")
            for array in h5file.root._f_walknodes('Array', recursive=True):
                print(array)

        """

        self._g_check_open()

        # For compatibility with old default arguments.
        if classname == '':
            classname = None

        if classname == "Group":
            # Recursive algorithm
            for group in self._f_walk_groups():
                yield group
        else:
            for group in self._f_walk_groups():
                for leaf in group._f_iter_nodes(classname):
                    yield leaf

    _f_walkNodes = previous_api(_f_walknodes)

    def _g_join(self, name):
        """Helper method to correctly concatenate a name child object with the
        pathname of this group."""

        if name == "/":
            # This case can happen when doing copies
            return self._v_pathname
        return join_path(self._v_pathname, name)

    def _g_width_warning(self):
        """Issue a :exc:`PerformanceWarning` on too many children."""

        warnings.warn(
            """\
group ``%s`` is exceeding the recommended maximum number of children (%d); \
be ready to see PyTables asking for *lots* of memory and possibly slow I/O."""
            % (self._v_pathname, self._v_max_group_width), PerformanceWarning)

    _g_widthWarning = previous_api(_g_width_warning)

    def _g_refnode(self, childnode, childname, validate=True):
        """Insert references to a `childnode` via a `childname`.

        Checks that the `childname` is valid and does not exist, then
        creates references to the given `childnode` by that `childname`.
        The validation of the name can be omitted by setting `validate`
        to a false value (this may be useful for adding already existing
        nodes to the tree).

        """

        # Check for name validity.
        if validate:
            check_name_validity(childname)
            childnode._g_check_name(childname)

        # Check if there is already a child with the same name.
        # This can be triggered because of the user
        # (via node construction or renaming/movement).
        # Links are not checked here because they are copied and referenced
        # using ``File.get_node`` so they already exist in `self`.
        if (not isinstance(childnode, Link)) and childname in self:
            raise NodeError(
                "group ``%s`` already has a child node named ``%s``" %
                (self._v_pathname, childname))

        # Show a warning if there is an object attribute with that name.
        if childname in self.__dict__:
            warnings.warn(
                "group ``%s`` already has an attribute named ``%s``; "
                "you will not be able to use natural naming "
                "to access the child node" % (self._v_pathname, childname),
                NaturalNameWarning)

        # Check group width limits.
        if (len(self._v_children) + len(self._v_hidden) >=
                self._v_max_group_width):
            self._g_width_warning()

        # Update members information.
        # Insert references to the new child.
        # (Assigned values are entirely irrelevant.)
        if isvisiblename(childname):
            # Visible node.
            self.__members__.insert(0, childname)  # enable completion
            self._v_children[childname] = None  # insert node
            if isinstance(childnode, Unknown):
                self._v_unknown[childname] = None
            elif isinstance(childnode, Link):
                self._v_links[childname] = None
            elif isinstance(childnode, Leaf):
                self._v_leaves[childname] = None
            elif isinstance(childnode, Group):
                self._v_groups[childname] = None
        else:
            # Hidden node.
            self._v_hidden[childname] = None  # insert node

    _g_refNode = previous_api(_g_refnode)

    def _g_unrefnode(self, childname):
        """Remove references to a node.

        Removes all references to the named node.

        """

        # This can *not* be triggered because of the user.
        assert childname in self, \
            ("group ``%s`` does not have a child node named ``%s``"
                % (self._v_pathname, childname))

        # Update members information, if needed
        if '_v_children' in self.__dict__:
            if childname in self._v_children:
                # Visible node.
                members = self.__members__
                member_index = members.index(childname)
                del members[member_index]  # disables completion

                del self._v_children[childname]  # remove node
                self._v_unknown.pop(childname, None)
                self._v_links.pop(childname, None)
                self._v_leaves.pop(childname, None)
                self._v_groups.pop(childname, None)
            else:
                # Hidden node.
                del self._v_hidden[childname]  # remove node

    _g_unrefNode = previous_api(_g_unrefnode)

    def _g_move(self, newparent, newname):
        # Move the node to the new location.
        oldpath = self._v_pathname
        super(Group, self)._g_move(newparent, newname)
        newpath = self._v_pathname

        # Update location information in children.  This node shouldn't
        # be affected since it has already been relocated.
        self._v_file._update_node_locations(oldpath, newpath)

    def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs):
        # Compute default arguments.
        title = kwargs.get('title', self._v_title)
        filters = kwargs.get('filters', None)
        stats = kwargs.get('stats', None)

        # Fix arguments with explicit None values for backwards compatibility.
        if title is None:
            title = self._v_title
        # If no filters have been passed to the call, copy them from the
        # source group, but only if inherited or explicitly set.
        if filters is None:
            filters = getattr(self._v_attrs, 'FILTERS', None)

        # Create a copy of the object.
        new_node = Group(newparent,
                         newname,
                         title,
                         new=True,
                         filters=filters,
                         _log=_log)

        # Copy user attributes if needed.
        if kwargs.get('copyuserattrs', True):
            self._v_attrs._g_copy(new_node._v_attrs, copyclass=True)

        # Update statistics if needed.
        if stats is not None:
            stats['groups'] += 1

        if recursive:
            # Copy child nodes if a recursive copy was requested.
            # Some arguments should *not* be passed to children copy ops.
            kwargs = kwargs.copy()
            kwargs.pop('title', None)
            self._g_copy_children(new_node, **kwargs)

        return new_node

    def _g_copy_children(self, newparent, **kwargs):
        """Copy child nodes.

        Copies all nodes descending from this one into the specified
        `newparent`.  If the new parent has a child node with the same
        name as one of the nodes in this group, the copy fails with a
        `NodeError`, maybe resulting in a partial copy.  Nothing is
        logged.

        """

        # Recursive version of children copy.
        # for srcchild in self._v_children.itervalues():
        ##    srcchild._g_copy_as_child(newparent, **kwargs)

        # Non-recursive version of children copy.
        use_hardlinks = kwargs.get('use_hardlinks', False)
        if use_hardlinks:
            address_map = kwargs.setdefault('address_map', {})

        parentstack = [(self, newparent)]  # [(source, destination), ...]
        while parentstack:
            (srcparent, dstparent) = parentstack.pop()

            if use_hardlinks:
                for srcchild in srcparent._v_children.itervalues():
                    addr, rc = srcchild._get_obj_info()
                    if rc > 1 and addr in address_map:
                        where, name = address_map[addr][0]
                        localsrc = os.path.join(where, name)
                        dstparent._v_file.create_hard_link(
                            dstparent, srcchild.name, localsrc)
                        address_map[addr].append(
                            (dstparent._v_pathname, srcchild.name))

                        # Update statistics if needed.
                        stats = kwargs.pop('stats', None)
                        if stats is not None:
                            stats['hardlinks'] += 1
                    else:
                        dstchild = srcchild._g_copy_as_child(
                            dstparent, **kwargs)
                        if isinstance(srcchild, Group):
                            parentstack.append((srcchild, dstchild))

                        if rc > 1:
                            address_map[addr] = [(dstparent._v_pathname,
                                                  srcchild.name)]
            else:
                for srcchild in srcparent._v_children.itervalues():
                    dstchild = srcchild._g_copy_as_child(dstparent, **kwargs)
                    if isinstance(srcchild, Group):
                        parentstack.append((srcchild, dstchild))

    _g_copyChildren = previous_api(_g_copy_children)

    def _f_get_child(self, childname):
        """Get the child called childname of this group.

        If the child exists (be it visible or not), it is returned.  Else, a
        NoSuchNodeError is raised.

        Using this method is recommended over getattr() when doing programmatic
        accesses to children if childname is unknown beforehand or when its
        name is not a valid Python identifier.

        """

        self._g_check_open()

        self._g_check_has_child(childname)

        childpath = join_path(self._v_pathname, childname)
        return self._v_file._get_node(childpath)

    _f_getChild = previous_api(_f_get_child)

    def _f_list_nodes(self, classname=None):
        """Return a *list* with children nodes.

        This is a list-returning version of :meth:`Group._f_iter_nodes()`.

        """

        return list(self._f_iter_nodes(classname))

    _f_listNodes = previous_api(_f_list_nodes)

    def _f_iter_nodes(self, classname=None):
        """Iterate over children nodes.

        Child nodes are yielded alphanumerically sorted by node name.  If the
        name of a class derived from Node (see :ref:`NodeClassDescr`) is
        supplied in the classname parameter, only instances of that class (or
        subclasses of it) will be returned.

        This is an iterator version of :meth:`Group._f_list_nodes`.

        """

        self._g_check_open()

        if not classname:
            # Returns all the children alphanumerically sorted
            names = sorted(self._v_children.iterkeys())
            for name in names:
                yield self._v_children[name]
        elif classname == 'Group':
            # Returns all the groups alphanumerically sorted
            names = sorted(self._v_groups.iterkeys())
            for name in names:
                yield self._v_groups[name]
        elif classname == 'Leaf':
            # Returns all the leaves alphanumerically sorted
            names = sorted(self._v_leaves.iterkeys())
            for name in names:
                yield self._v_leaves[name]
        elif classname == 'Link':
            # Returns all the links alphanumerically sorted
            names = sorted(self._v_links.iterkeys())
            for name in names:
                yield self._v_links[name]
        elif classname == 'IndexArray':
            raise TypeError("listing ``IndexArray`` nodes is not allowed")
        else:
            class_ = get_class_by_name(classname)

            children = self._v_children
            childnames = sorted(children.iterkeys())

            for childname in childnames:
                childnode = children[childname]
                if isinstance(childnode, class_):
                    yield childnode

    _f_iterNodes = previous_api(_f_iter_nodes)

    def _f_walk_groups(self):
        """Recursively iterate over descendent groups (not leaves).

        This method starts by yielding *self*, and then it goes on to
        recursively iterate over all child groups in alphanumerical order, top
        to bottom (preorder), following the same procedure.

        """

        self._g_check_open()

        stack = [self]
        yield self
        # Iterate over the descendants
        while stack:
            objgroup = stack.pop()
            groupnames = sorted(objgroup._v_groups.iterkeys())
            # Sort the groups before delivering. This uses the groups names
            # for groups in tree (in order to sort() can classify them).
            for groupname in groupnames:
                stack.append(objgroup._v_groups[groupname])
                yield objgroup._v_groups[groupname]

    _f_walkGroups = previous_api(_f_walk_groups)

    def __delattr__(self, name):
        """Delete a Python attribute called name.

        This method deletes an *ordinary Python attribute* from the object.
        It does *not* remove children nodes from this group; for that,
        use :meth:`File.remove_node` or :meth:`Node._f_remove`.
        It does *neither* delete a PyTables node attribute; for that,
        use :meth:`File.del_node_attr`, :meth:`Node._f_delattr` or
        :attr:`Node._v_attrs``.

        If there is an attribute and a child node with the same name,
        the child node will be made accessible again via natural naming.

        """

        try:
            super(Group, self).__delattr__(name)  # nothing particular
        except AttributeError as ae:
            hint = " (use ``node._f_remove()`` if you want to remove a node)"
            raise ae.__class__(str(ae) + hint)

    def __getattr__(self, name):
        """Get a Python attribute or child node called name.

        If the object has a Python attribute called name, its value is
        returned. Else, if the node has a child node called name, it is
        returned.  Else, an AttributeError is raised.

        """

        # That is true since a `NoSuchNodeError` is an `AttributeError`.
        mydict = self.__dict__
        if name in mydict:
            return mydict[name]
        elif name in self._c_lazy_children_attrs:
            self._g_add_children_names()
            return mydict[name]
        return self._f_get_child(name)

    def __setattr__(self, name, value):
        """Set a Python attribute called name with the given value.

        This method stores an *ordinary Python attribute* in the object. It
        does *not* store new children nodes under this group; for that, use the
        File.create*() methods (see the File class
        in :ref:`FileClassDescr`). It does *neither* store a PyTables node
        attribute; for that,
        use :meth:`File.set_node_attr`, :meth`:Node._f_setattr`
        or :attr:`Node._v_attrs`.

        If there is already a child node with the same name, a
        NaturalNameWarning will be issued and the child node will not be
        accessible via natural naming nor getattr(). It will still be available
        via :meth:`File.get_node`, :meth:`Group._f_get_child` and children
        dictionaries in the group (if visible).

        """

        # Show a warning if there is an child node with that name.
        #
        # ..note::
        #
        #   Using ``if name in self:`` is not right since that would
        #   require ``_v_children`` and ``_v_hidden`` to be already set
        #   when the very first attribute assignments are made.
        #   Moreover, this warning is only concerned about clashes with
        #   names used in natural naming, i.e. those in ``__members__``.
        #
        # ..note::
        #
        #   The check ``'__members__' in myDict`` allows attribute
        #   assignment to happen before calling `Group.__init__()`, by
        #   avoiding to look into the still not assigned ``__members__``
        #   attribute.  This allows subclasses to set up some attributes
        #   and then call the constructor of the superclass.  If the
        #   check above is disabled, that results in Python entering an
        #   endless loop on exit!

        mydict = self.__dict__
        if '__members__' in mydict and name in self.__members__:
            warnings.warn(
                "group ``%s`` already has a child node named ``%s``; "
                "you will not be able to use natural naming "
                "to access the child node" % (self._v_pathname, name),
                NaturalNameWarning)

        super(Group, self).__setattr__(name, value)

    def _f_flush(self):
        """Flush this Group."""

        self._g_check_open()
        self._g_flush_group()

    def _g_close_descendents(self):
        """Close all the *loaded* descendent nodes of this group."""

        node_manager = self._v_file._node_manager
        node_manager.close_subtree(self._v_pathname)

    _g_closeDescendents = previous_api(_g_close_descendents)

    def _g_close(self):
        """Close this (open) group."""

        if self._v_isopen:
            # hdf5extension operations:
            #   Close HDF5 group.
            self._g_close_group()

        # Close myself as a node.
        super(Group, self)._f_close()

    def _f_close(self):
        """Close this group and all its descendents.

        This method has the behavior described in :meth:`Node._f_close`.
        It should be noted that this operation closes all the nodes
        descending from this group.

        You should not need to close nodes manually because they are
        automatically opened/closed when they are loaded/evicted from
        the integrated LRU cache.

        """

        # If the group is already closed, return immediately
        if not self._v_isopen:
            return

        # First, close all the descendents of this group, unless a) the
        # group is being deleted (evicted from LRU cache) or b) the node
        # is being closed during an aborted creation, in which cases
        # this is not an explicit close issued by the user.
        if not (self._v__deleting or self._v_objectid is None):
            self._g_close_descendents()

        # When all the descendents have been closed, close this group.
        # This is done at the end because some nodes may still need to
        # be loaded during the closing process; thus this node must be
        # open until the very end.
        self._g_close()

    def _g_remove(self, recursive=False, force=False):
        """Remove (recursively if needed) the Group.

        This version correctly handles both visible and hidden nodes.

        """

        if self._v_nchildren > 0:
            if not (recursive or force):
                raise NodeError("group ``%s`` has child nodes; "
                                "please set `recursive` or `force` to true "
                                "to remove it" % (self._v_pathname, ))

            # First close all the descendents hanging from this group,
            # so that it is not possible to use a node that no longer exists.
            self._g_close_descendents()

        # Remove the node itself from the hierarchy.
        super(Group, self)._g_remove(recursive, force)

    def _f_copy(self,
                newparent=None,
                newname=None,
                overwrite=False,
                recursive=False,
                createparents=False,
                **kwargs):
        """Copy this node and return the new one.

        This method has the behavior described in :meth:`Node._f_copy`.
        In addition, it recognizes the following keyword arguments:

        Parameters
        ----------
        title
            The new title for the destination. If omitted or None, the
            original title is used. This only applies to the topmost
            node in recursive copies.
        filters : Filters
            Specifying this parameter overrides the original filter
            properties in the source node. If specified, it must be an
            instance of the Filters class (see :ref:`FiltersClassDescr`).
            The default is to copy the filter properties from the source
            node.
        copyuserattrs
            You can prevent the user attributes from being copied by setting
            thisparameter to False. The default is to copy them.
        stats
            This argument may be used to collect statistics on the copy
            process. When used, it should be a dictionary with keys 'groups',
            'leaves', 'links' and 'bytes' having a numeric value. Their values
            willbe incremented to reflect the number of groups, leaves and
            bytes, respectively, that have been copied during the operation.

        """

        return super(Group, self)._f_copy(newparent, newname, overwrite,
                                          recursive, createparents, **kwargs)

    def _f_copy_children(self,
                         dstgroup,
                         overwrite=False,
                         recursive=False,
                         createparents=False,
                         **kwargs):
        """Copy the children of this group into another group.

        Children hanging directly from this group are copied into dstgroup,
        which can be a Group (see :ref:`GroupClassDescr`) object or its
        pathname in string form. If createparents is true, the needed groups
        for the given destination group path to exist will be created.

        The operation will fail with a NodeError if there is a child node
        in the destination group with the same name as one of the copied
        children from this one, unless overwrite is true; in this case,
        the former child node is recursively removed before copying the
        later.

        By default, nodes descending from children groups of this node
        are not copied. If the recursive argument is true, all descendant
        nodes of this node are recursively copied.

        Additional keyword arguments may be passed to customize the
        copying process. For instance, title and filters may be changed,
        user attributes may be or may not be copied, data may be sub-sampled,
        stats may be collected, etc. Arguments unknown to nodes are simply
        ignored. Check the documentation for copying operations of nodes to
        see which options they support.

        """

        self._g_check_open()

        # `dstgroup` is used instead of its path to avoid accepting
        # `Node` objects when `createparents` is true.  Also, note that
        # there is no risk of creating parent nodes and failing later
        # because of destination nodes already existing.
        dstparent = self._v_file._get_or_create_path(dstgroup, createparents)
        self._g_check_group(dstparent)  # Is it a group?

        if not overwrite:
            # Abort as early as possible when destination nodes exist
            # and overwriting is not enabled.
            for childname in self._v_children:
                if childname in dstparent:
                    raise NodeError(
                        "destination group ``%s`` already has "
                        "a node named ``%s``; "
                        "you may want to use the ``overwrite`` argument" %
                        (dstparent._v_pathname, childname))

        use_hardlinks = kwargs.get('use_hardlinks', False)
        if use_hardlinks:
            address_map = kwargs.setdefault('address_map', {})

            for child in self._v_children.itervalues():
                addr, rc = child._get_obj_info()
                if rc > 1 and addr in address_map:
                    where, name = address_map[addr][0]
                    localsrc = os.path.join(where, name)
                    dstparent._v_file.create_hard_link(dstparent, child.name,
                                                       localsrc)
                    address_map[addr].append(
                        (dstparent._v_pathname, child.name))

                    # Update statistics if needed.
                    stats = kwargs.pop('stats', None)
                    if stats is not None:
                        stats['hardlinks'] += 1
                else:
                    child._f_copy(dstparent, None, overwrite, recursive,
                                  **kwargs)
                    if rc > 1:
                        address_map[addr] = [(dstparent._v_pathname,
                                              child.name)]
        else:
            for child in self._v_children.itervalues():
                child._f_copy(dstparent, None, overwrite, recursive, **kwargs)

    _f_copyChildren = previous_api(_f_copy_children)

    def __str__(self):
        """Return a short string representation of the group.

        Examples
        --------

        ::

            >>> f=tables.open_file('data/test.h5')
            >>> print(f.root.group0)
            /group0 (Group) 'First Group'

        """

        pathname = self._v_pathname
        classname = self.__class__.__name__
        title = self._v_title
        return "%s (%s) %r" % (pathname, classname, title)

    def __repr__(self):
        """Return a detailed string representation of the group.

        Examples
        --------

        ::

            >>> f = tables.open_file('data/test.h5')
            >>> f.root.group0
            /group0 (Group) 'First Group'
              children := ['tuple1' (Table), 'group1' (Group)]

        """

        rep = [
            '%r (%s)' % (childname, child.__class__.__name__)
            for (childname, child) in self._v_children.iteritems()
        ]
        childlist = '[%s]' % (', '.join(rep))

        return "%s\n  children := %s" % (str(self), childlist)
Ejemplo n.º 14
0
class RootGroup(Group):

    _v_objectId = previous_api_property('_v_objectid')

    def __init__(self, ptfile, name, title, new, filters):
        mydict = self.__dict__

        # Set group attributes.
        self._v_version = obversion
        self._v_new = new
        if new:
            self._v_new_title = title
            self._v_new_filters = filters
        else:
            self._v_new_title = None
            self._v_new_filters = None

        # Set node attributes.
        self._v_file = ptfile
        self._v_isopen = True  # root is always open
        self._v_pathname = '/'
        self._v_name = '/'
        self._v_depth = 0
        self._v_max_group_width = ptfile.params['MAX_GROUP_WIDTH']
        self._v__deleting = False
        self._v_objectid = None  # later

        # Only the root node has the file as a parent.
        # Bypass __setattr__ to avoid the ``Node._v_parent`` property.
        mydict['_v_parent'] = ptfile
        ptfile._node_manager.register_node(self, '/')

        # hdf5extension operations (do before setting an AttributeSet):
        #   Update node attributes.
        self._g_new(ptfile, name, init=True)
        #   Open the node and get its object ID.
        self._v_objectid = self._g_open()

        # Set disk attributes and read children names.
        #
        # This *must* be postponed because this method needs the root node
        # to be created and bound to ``File.root``.
        # This is an exception to the rule, handled by ``File.__init()__``.
        #
        # self._g_post_init_hook()

    def _g_load_child(self, childname):
        """Load a child node from disk.

        The child node `childname` is loaded from disk and an adequate
        `Node` object is created and returned.  If there is no such
        child, a `NoSuchNodeError` is raised.

        """

        if self._v_file.root_uep != "/":
            childname = join_path(self._v_file.root_uep, childname)
        # Is the node a group or a leaf?
        node_type = self._g_check_has_child(childname)

        # Nodes that HDF5 report as H5G_UNKNOWN
        if node_type == 'Unknown':
            return Unknown(self, childname)

        # Guess the PyTables class suited to the node,
        # build a PyTables node and return it.
        if node_type == "Group":
            if self._v_file.params['PYTABLES_SYS_ATTRS']:
                ChildClass = self._g_get_child_group_class(childname)
            else:
                # Default is a Group class
                ChildClass = Group
            return ChildClass(self, childname, new=False)
        elif node_type == "Leaf":
            ChildClass = self._g_get_child_leaf_class(childname, warn=True)
            # Building a leaf may still fail because of unsupported types
            # and other causes.
            # return ChildClass(self, childname)  # uncomment for debugging
            try:
                return ChildClass(self, childname)
            except Exception as exc:  # XXX
                warnings.warn(
                    "problems loading leaf ``%s``::\n\n"
                    "  %s\n\n"
                    "The leaf will become an ``UnImplemented`` node." %
                    (self._g_join(childname), exc))
                # If not, associate an UnImplemented object to it
                return UnImplemented(self, childname)
        elif node_type == "SoftLink":
            return SoftLink(self, childname)
        elif node_type == "ExternalLink":
            return ExternalLink(self, childname)
        else:
            return UnImplemented(self, childname)

    _g_loadChild = previous_api(_g_load_child)

    def _f_rename(self, newname):
        raise NodeError("the root node can not be renamed")

    def _f_move(self, newparent=None, newname=None, createparents=False):
        raise NodeError("the root node can not be moved")

    def _f_remove(self, recursive=False):
        raise NodeError("the root node can not be removed")
Ejemplo n.º 15
0
class UnImplemented(hdf5extension.UnImplemented, Leaf):
    """This class represents datasets not supported by PyTables in an
    HDF5 file.

    When reading a generic HDF5 file (i.e. one that has not been created with
    PyTables, but with some other HDF5 library based tool), chances are that
    the specific combination of datatypes or dataspaces in some dataset might
    not be supported by PyTables yet. In such a case, this dataset will be
    mapped into an UnImplemented instance and the user will still be able to
    access the complete object tree of the generic HDF5 file. The user will
    also be able to *read and write the attributes* of the dataset, *access
    some of its metadata*, and perform *certain hierarchy manipulation
    operations* like deleting or moving (but not copying) the node. Of course,
    the user will not be able to read the actual data on it.

    This is an elegant way to allow users to work with generic HDF5 files
    despite the fact that some of its datasets are not supported by
    PyTables. However, if you are really interested in having full access to an
    unimplemented dataset, please get in contact with the developer team.

    This class does not have any public instance variables or methods, except
    those inherited from the Leaf class (see :ref:`LeafClassDescr`).

    """

    # Class identifier.
    _c_classid = 'UNIMPLEMENTED'

    _c_classId = previous_api_property('_c_classid')

    def __init__(self, parentnode, name):
        """Create the `UnImplemented` instance."""

        # UnImplemented objects always come from opening an existing node
        # (they can not be created).
        self._v_new = False
        """Is this the first time the node has been created?"""
        self.nrows = SizeType(0)
        """The length of the first dimension of the data."""
        self.shape = (SizeType(0), )
        """The shape of the stored data."""
        self.byteorder = None
        """The endianness of data in memory ('big', 'little' or
        'irrelevant')."""

        super(UnImplemented, self).__init__(parentnode, name)

    def _g_open(self):
        (self.shape, self.byteorder, object_id) = self._open_unimplemented()
        try:
            self.nrows = SizeType(self.shape[0])
        except IndexError:
            self.nrows = SizeType(0)
        return object_id

    def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs):
        """Do nothing.

        This method does nothing, but a ``UserWarning`` is issued.
        Please note that this method *does not return a new node*, but
        ``None``.

        """

        warnings.warn(
            "UnImplemented node %r does not know how to copy itself; skipping"
            % (self._v_pathname, ))
        return None  # Can you see it?

    def _f_copy(self,
                newparent=None,
                newname=None,
                overwrite=False,
                recursive=False,
                createparents=False,
                **kwargs):
        """Do nothing.

        This method does nothing, since `UnImplemented` nodes can not
        be copied.  However, a ``UserWarning`` is issued.  Please note
        that this method *does not return a new node*, but ``None``.

        """

        # This also does nothing but warn.
        self._g_copy(newparent, newname, recursive, **kwargs)
        return None  # Can you see it?

    def __repr__(self):
        return """%s
  NOTE: <The UnImplemented object represents a PyTables unimplemented
         dataset present in the '%s' HDF5 file.  If you want to see this
         kind of HDF5 dataset implemented in PyTables, please contact the
         developers.>
""" % (str(self), self._v_file.filename)
Ejemplo n.º 16
0
class OldIndexArray(UnImplemented):
    _c_classid = 'IndexArray'

    _c_classId = previous_api_property('_c_classid')
Ejemplo n.º 17
0
class ProxyDict(dict):
    """A dictionary which uses a container object to store its values."""

    containerRef = previous_api_property('containerref')

    def __init__(self, container):
        self.containerref = weakref.ref(container)
        """A weak reference to the container object.

        .. versionchanged:: 3.0
           The *containerRef* attribute has been renamed into
           *containerref*.

        """

    def __getitem__(self, key):
        if key not in self:
            raise KeyError(key)

        # Values are not actually stored to avoid extra references.
        return self._get_value_from_container(self._get_container(), key)

    def __setitem__(self, key, value):
        # Values are not actually stored to avoid extra references.
        super(ProxyDict, self).__setitem__(key, None)

    def __repr__(self):
        return object.__repr__(self)

    def __str__(self):
        # C implementation does not use `self.__getitem__()`. :(
        itemFormat = '%r: %r'
        itemReprs = [itemFormat % item for item in self.iteritems()]
        return '{%s}' % ', '.join(itemReprs)

    def values(self):
        # C implementation does not use `self.__getitem__()`. :(
        valueList = []
        for key in self.iterkeys():
            valueList.append(self[key])
        return valueList

    def itervalues(self):
        # C implementation does not use `self.__getitem__()`. :(
        for key in self.iterkeys():
            yield self[key]
        raise StopIteration

    def items(self):
        # C implementation does not use `self.__getitem__()`. :(
        itemList = []
        for key in self.iterkeys():
            itemList.append((key, self[key]))
        return itemList

    def iteritems(self):
        # C implementation does not use `self.__getitem__()`. :(
        for key in self.iterkeys():
            yield (key, self[key])
        raise StopIteration

    def _get_container(self):
        container = self.containerref()
        if container is None:
            raise ValueError("the container object does no longer exist")
        return container

    _getContainer = previous_api(_get_container)
Ejemplo n.º 18
0
class SoftLink(linkextension.SoftLink, Link):
    """Represents a soft link (aka symbolic link).

    A soft link is a reference to another node in the *same* file hierarchy.
    Provided that the target node exists, its attributes and methods can be
    accessed directly from the softlink using the normal `.` syntax.

    Softlinks also have the following public methods/attributes:

        * `target`
        * `dereference()`
        * `copy()`
        * `move()`
        * `remove()`
        * `rename()`
        * `is_dangling()`

    Note that these will override any correspondingly named methods/attributes
    of the target node.

    For backwards compatibility, it is also possible to obtain the target node
    via the `__call__()` special method (this action is called *dereferencing*;
    see below)

    Examples
    --------

    ::
        >>> f = tables.open_file('/tmp/test_softlink.h5', 'w')
        >>> a = f.create_array('/', 'A', np.arange(10))
        >>> link_a = f.create_soft_link('/', 'link_A', target='/A')

        # transparent read/write access to a softlinked node
        >>> link_a[0] = -1
        >>> print(link_a[:], link_a.dtype)
        (array([-1,  1,  2,  3,  4,  5,  6,  7,  8,  9]), dtype('int64'))

        # dereferencing a softlink using the __call__() method
        >>> print(link_a() is a)
        True

        # SoftLink.remove() overrides Array.remove()
        >>> link_a.remove()
        >>> print(link_a)
        <closed tables.link.SoftLink at 0x7febe97186e0>
        >>> print(a[:], a.dtype)
        (array([-1,  1,  2,  3,  4,  5,  6,  7,  8,  9]), dtype('int64'))


    """

    # Class identifier.
    _c_classid = 'SOFTLINK'

    _c_classId = previous_api_property('_c_classid')

    # attributes with these names/prefixes are treated as attributes of the
    # SoftLink rather than the target node
    _link_attrnames = ('target', 'dereference', 'is_dangling', 'copy', 'move',
                       'remove', 'rename', '__init__', '__str__', '__repr__',
                       '__class__', '__dict__')
    _link_attrprefixes = ('_f_', '_c_', '_g_', '_v_')

    def __call__(self):
        """Dereference `self.target` and return the object.

        Examples
        --------

        ::

            >>> f=tables.open_file('data/test.h5')
            >>> print(f.root.link0)
            /link0 (SoftLink) -> /another/path
            >>> print(f.root.link0())
            /another/path (Group) ''

        """
        return self.dereference()

    def dereference(self):

        if self._v_isopen:
            target = self.target
            # Check for relative pathnames
            if not self.target.startswith('/'):
                target = self._v_parent._g_join(self.target)
            return self._v_file._get_node(target)
        else:
            return None

    def __getattribute__(self, attrname):

        # get attribute of the SoftLink itself
        if (attrname in SoftLink._link_attrnames
                or attrname[:3] in SoftLink._link_attrprefixes):
            return object.__getattribute__(self, attrname)

        # get attribute of the target node
        elif not self._v_isopen:
            raise tables.ClosedNodeError('the node object is closed')
        elif self.is_dangling():
            return None
        else:
            target_node = self.dereference()
            try:
                # __getattribute__() fails to get children of Groups
                return target_node.__getattribute__(attrname)
            except AttributeError:
                # some node classes (e.g. Array) don't implement __getattr__()
                return target_node.__getattr__(attrname)

    def __setattr__(self, attrname, value):

        # set attribute of the SoftLink itself
        if (attrname in SoftLink._link_attrnames
                or attrname[:3] in SoftLink._link_attrprefixes):
            object.__setattr__(self, attrname, value)

        # set attribute of the target node
        elif not self._v_isopen:
            raise tables.ClosedNodeError('the node object is closed')
        elif self.is_dangling():
            raise ValueError("softlink target does not exist")
        else:
            self.dereference().__setattr__(attrname, value)

    def __getitem__(self, key):
        """__getitem__ must be defined in the SoftLink class in order for array
        indexing syntax to work"""

        if not self._v_isopen:
            raise tables.ClosedNodeError('the node object is closed')
        elif self.is_dangling():
            raise ValueError("softlink target does not exist")
        else:
            return self.dereference().__getitem__(key)

    def __setitem__(self, key, value):
        """__setitem__ must be defined in the SoftLink class in order for array
        indexing syntax to work"""

        if not self._v_isopen:
            raise tables.ClosedNodeError('the node object is closed')
        elif self.is_dangling():
            raise ValueError("softlink target does not exist")
        else:
            self.dereference().__setitem__(key, value)

    def is_dangling(self):
        return not (self.dereference() in self._v_file)

    def __str__(self):
        """Return a short string representation of the link.

        Examples
        --------

        ::

            >>> f=tables.open_file('data/test.h5')
            >>> print(f.root.link0)
            /link0 (SoftLink) -> /path/to/node

        """

        classname = self.__class__.__name__
        target = str(self.target)
        # Check for relative pathnames
        if not self.target.startswith('/'):
            target = self._v_parent._g_join(self.target)
        if self._v_isopen:
            closed = ""
        else:
            closed = "closed "
        if target not in self._v_file:
            dangling = " (dangling)"
        else:
            dangling = ""
        return "%s%s (%s) -> %s%s" % (closed, self._v_pathname, classname,
                                      self.target, dangling)
Ejemplo n.º 19
0
class SoftLink(linkextension.SoftLink, Link):
    """Represents a soft link (aka symbolic link).

    A soft link is a reference to another node in the *same* file hierarchy.
    Getting access to the pointed node (this action is called *dereferrencing*)
    is done via the __call__ special method (see below).

    """

    # Class identifier.
    _c_classid = 'SOFTLINK'

    _c_classId = previous_api_property('_c_classid')

    def __call__(self):
        """Dereference `self.target` and return the object.

        Examples
        --------

        ::

            >>> f=tables.open_file('data/test.h5')
            >>> print f.root.link0
            /link0 (SoftLink) -> /another/path
            >>> print f.root.link0()
            /another/path (Group) ''

        """

        target = self.target
        # Check for relative pathnames
        if not self.target.startswith('/'):
            target = self._v_parent._g_join(self.target)
        return self._v_file._get_node(target)

    def __str__(self):
        """Return a short string representation of the link.

        Examples
        --------

        ::

            >>> f=tables.open_file('data/test.h5')
            >>> print f.root.link0
            /link0 (SoftLink) -> /path/to/node

        """

        classname = self.__class__.__name__
        target = self.target
        # Check for relative pathnames
        if not self.target.startswith('/'):
            target = self._v_parent._g_join(self.target)
        if target in self._v_file:
            dangling = ""
        else:
            dangling = " (dangling)"
        return "%s (%s) -> %s%s" % (self._v_pathname, classname, self.target,
                                    dangling)
Ejemplo n.º 20
0
class IndexArray(NotLoggedMixin, EArray, indexesextension.IndexArray):
    """Represent the index (sorted or reverse index) dataset in HDF5 file.

    All NumPy typecodes are supported except for complex datatypes.

    Parameters
    ----------
    parentnode
        The Index class from which this object will hang off.

        .. versionchanged:: 3.0
           Renamed from *parentNode* to *parentnode*.

    name : str
        The name of this node in its parent group.
    atom
        An Atom object representing the shape and type of the atomic objects to
        be saved. Only scalar atoms are supported.
    title
        Sets a TITLE attribute on the array entity.
    filters : Filters
        An instance of the Filters class that provides information about the
        desired I/O filters to be applied during the life of this object.
    byteorder
        The byteroder of the data on-disk.

    """

    # Class identifier.
    _c_classid = 'INDEXARRAY'

    _c_classId = previous_api_property('_c_classid')

    # Properties
    # ~~~~~~~~~~
    chunksize = property(lambda self: self.chunkshape[1], None, None,
                         """The chunksize for this object.""")

    slicesize = property(lambda self: self.shape[1], None, None,
                         """The slicesize for this object.""")

    # Other methods
    # ~~~~~~~~~~~~~
    def __init__(self,
                 parentnode,
                 name,
                 atom=None,
                 title="",
                 filters=None,
                 byteorder=None):
        """Create an IndexArray instance."""

        self._v_pathname = parentnode._g_join(name)
        if atom is not None:
            # The shape and chunkshape needs to be fixed here
            if name == "sorted":
                reduction = parentnode.reduction
                shape = (0, parentnode.slicesize // reduction)
                chunkshape = (1, parentnode.chunksize // reduction)
            else:
                shape = (0, parentnode.slicesize)
                chunkshape = (1, parentnode.chunksize)
        else:
            # The shape and chunkshape will be read from disk later on
            shape = None
            chunkshape = None

        super(IndexArray, self).__init__(parentnode,
                                         name,
                                         atom,
                                         shape,
                                         title,
                                         filters,
                                         chunkshape=chunkshape,
                                         byteorder=byteorder)

    # This version of searchBin uses both ranges (1st level) and
    # bounds (2nd level) caches. It uses a cache for boundary rows,
    # but not for 'sorted' rows (this is only supported for the
    # 'optimized' types).
    def _search_bin(self, nrow, item):
        item1, item2 = item
        result1 = -1
        result2 = -1
        hi = self.shape[1]
        ranges = self._v_parent.rvcache
        boundscache = self.boundscache
        # First, look at the beginning of the slice
        begin = ranges[nrow, 0]
        # Look for items at the beginning of sorted slices
        if item1 <= begin:
            result1 = 0
        if item2 < begin:
            result2 = 0
        if result1 >= 0 and result2 >= 0:
            return (result1, result2)
        # Then, look for items at the end of the sorted slice
        end = ranges[nrow, 1]
        if result1 < 0:
            if item1 > end:
                result1 = hi
        if result2 < 0:
            if item2 >= end:
                result2 = hi
        if result1 >= 0 and result2 >= 0:
            return (result1, result2)
        # Finally, do a lookup for item1 and item2 if they were not found
        # Lookup in the middle of slice for item1
        chunksize = self.chunksize  # Number of elements/chunksize
        nchunk = -1
        # Try to get the bounds row from the LRU cache
        nslot = boundscache.getslot(nrow)
        if nslot >= 0:
            # Cache hit. Use the row kept there.
            bounds = boundscache.getitem(nslot)
        else:
            # No luck with cached data. Read the row and put it in the cache.
            bounds = self._v_parent.bounds[nrow]
            size = bounds.size * bounds.itemsize
            boundscache.setitem(nrow, bounds, size)
        if result1 < 0:
            # Search the appropriate chunk in bounds cache
            nchunk = bisect_left(bounds, item1)
            chunk = self._read_sorted_slice(nrow, chunksize * nchunk,
                                            chunksize * (nchunk + 1))
            result1 = self._bisect_left(chunk, item1, chunksize)
            result1 += chunksize * nchunk
        # Lookup in the middle of slice for item2
        if result2 < 0:
            # Search the appropriate chunk in bounds cache
            nchunk2 = bisect_right(bounds, item2)
            if nchunk2 != nchunk:
                chunk = self._read_sorted_slice(nrow, chunksize * nchunk2,
                                                chunksize * (nchunk2 + 1))
            result2 = self._bisect_right(chunk, item2, chunksize)
            result2 += chunksize * nchunk2
        return (result1, result2)

    _searchBin = previous_api(_search_bin)

    def __str__(self):
        "A compact representation of this class"
        return "IndexArray(path=%s)" % self._v_pathname

    def __repr__(self):
        """A verbose representation of this class"""

        return """%s
  atom = %r
  shape = %s
  nrows = %s
  chunksize = %s
  slicesize = %s
  byteorder = %r""" % (self, self.atom, self.shape, self.nrows, self.chunksize,
                       self.slicesize, self.byteorder)
Ejemplo n.º 21
0
class ExternalLink(linkextension.ExternalLink, Link):
    """Represents an external link.

    An external link is a reference to a node in *another* file.
    Getting access to the pointed node (this action is called
    *dereferencing*) is done via the :meth:`__call__` special method
    (see below).

    .. rubric:: ExternalLink attributes

    .. attribute:: extfile

        The external file handler, if the link has been dereferenced.
        In case the link has not been dereferenced yet, its value is
        None.

    """

    # Class identifier.
    _c_classid = 'EXTERNALLINK'

    _c_classId = previous_api_property('_c_classid')

    def __init__(self, parentnode, name, target=None, _log=False):
        self.extfile = None
        """The external file handler, if the link has been dereferenced.
        In case the link has not been dereferenced yet, its value is
        None."""
        super(ExternalLink, self).__init__(parentnode, name, target, _log)

    def _get_filename_node(self):
        """Return the external filename and nodepath from `self.target`."""

        # This is needed for avoiding the 'C:\\file.h5' filepath notation
        filename, target = self.target.split(':/')
        return filename, '/' + target

    def __call__(self, **kwargs):
        """Dereference self.target and return the object.

        You can pass all the arguments supported by the :func:`open_file`
        function (except filename, of course) so as to open the referenced
        external file.

        Examples
        --------

        ::

            >>> f=tables.open_file('data1/test1.h5')
            >>> print(f.root.link2)
            /link2 (ExternalLink) -> data2/test2.h5:/path/to/node
            >>> plink2 = f.root.link2('a')  # open in 'a'ppend mode
            >>> print(plink2)
            /path/to/node (Group) ''
            >>> print(plink2._v_filename)
            'data2/test2.h5'        # belongs to referenced file

        """

        filename, target = self._get_filename_node()

        if not os.path.isabs(filename):
            # Resolve the external link with respect to the this
            # file's directory.  See #306.
            base_directory = os.path.dirname(self._v_file.filename)
            filename = os.path.join(base_directory, filename)

        if self.extfile is None or not self.extfile.isopen:
            self.extfile = tables.open_file(filename, **kwargs)
        else:
            # XXX: implement better consistency checks
            assert self.extfile.filename == filename
            assert self.extfile.mode == kwargs.get('mode', 'r')

        return self.extfile._get_node(target)

    def umount(self):
        """Safely unmount self.extfile, if opened."""

        extfile = self.extfile
        # Close external file, if open
        if extfile is not None and extfile.isopen:
            extfile.close()
            self.extfile = None

    def _f_close(self):
        """Especific close for external links."""

        self.umount()
        super(ExternalLink, self)._f_close()

    def __str__(self):
        """Return a short string representation of the link.

        Examples
        --------

        ::

            >>> f=tables.open_file('data1/test1.h5')
            >>> print(f.root.link2)
            /link2 (ExternalLink) -> data2/test2.h5:/path/to/node

        """

        classname = self.__class__.__name__
        return "%s (%s) -> %s" % (self._v_pathname, classname, self.target)