コード例 #1
0
ファイル: unimplemented.py プロジェクト: timburgess/PyTables
 def _g_open(self):
     (self.shape, self.byteorder, object_id) = self._open_unimplemented()
     try:
         self.nrows = SizeType(self.shape[0])
     except IndexError:
         self.nrows = SizeType(0)
     return object_id
コード例 #2
0
    def _calc_chunkshape(self, expectedrows):
        """Calculate the size for the HDF5 chunk."""

        # For computing the chunkshape for HDF5 VL types, we have to
        # choose the itemsize of the *each* element of the atom and
        # not the size of the entire atom.  I don't know why this
        # should be like this, perhaps I should report this to the
        # HDF5 list.
        # F. Alted 2006-11-23
        # elemsize = self.atom.atomsize()
        elemsize = self._basesize

        # AV 2013-05-03
        # This is just a quick workaround tha allows to change the API for
        # PyTables 3.0 release and remove the expected_mb parameter.
        # The algorithm for computing the chunkshape should be rewritten as
        # requested by gh-35.
        expected_mb = expectedrows * elemsize / 1024.**2

        chunksize = calc_chunksize(expected_mb)

        # Set the chunkshape
        chunkshape = chunksize // elemsize
        # Safeguard against itemsizes being extremely large
        if chunkshape == 0:
            chunkshape = 1
        return (SizeType(chunkshape), )
コード例 #3
0
ファイル: unimplemented.py プロジェクト: timburgess/PyTables
    def __init__(self, parentnode, name):
        """Create the `UnImplemented` instance."""

        # UnImplemented objects always come from opening an existing node
        # (they can not be created).
        self._v_new = False
        """Is this the first time the node has been created?"""
        self.nrows = SizeType(0)
        """The length of the first dimension of the data."""
        self.shape = (SizeType(0), )
        """The shape of the stored data."""
        self.byteorder = None
        """The endianness of data in memory ('big', 'little' or
        'irrelevant')."""

        super(UnImplemented, self).__init__(parentnode, name)
コード例 #4
0
    def _g_copyWithStats(self, group, name, start, stop, step,
                         title, filters, chunkshape, _log, **kwargs):
        "Private part of Leaf.copy() for each kind of leaf"

        # Build the new VLArray object
        object = VLArray(
            group, name, self.atom, title=title, filters=filters,
            expectedsizeinMB=self._v_expectedsizeinMB, chunkshape=chunkshape,
            _log=_log)
        # Now, fill the new vlarray with values from the old one
        # This is not buffered because we cannot forsee the length
        # of each record. So, the safest would be a copy row by row.
        # In the future, some analysis can be done in order to buffer
        # the copy process.
        nrowsinbuf = 1
        (start, stop, step) = self._processRangeRead(start, stop, step)
        # Optimized version (no conversions, no type and shape checks, etc...)
        nrowscopied = SizeType(0)
        nbytes = 0
        if not hasattr(self.atom, 'size'):  # it is a pseudo-atom
            atomsize = self.atom.base.size
        else:
            atomsize = self.atom.size
        for start2 in lrange(start, stop, step*nrowsinbuf):
            # Save the records on disk
            stop2 = start2+step*nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            nparr = self._readArray(start=start2, stop=stop2, step=step)[0]
            nobjects = nparr.shape[0]
            object._append(nparr, nobjects)
            nbytes += nobjects*atomsize
            nrowscopied +=1
        object.nrows = nrowscopied
        return (object, nbytes)
コード例 #5
0
    def _initLoop(self):
        "Initialization for the __iter__ iterator"

        self._nrowsread = self._start
        self._startb = self._start
        self._row = -1  # Sentinel
        self._init = True  # Sentinel
        self.nrow = SizeType(self._start - self._step)  # row number
コード例 #6
0
    def _calc_chunkshape(self, expectedrows, rowsize, itemsize):
        """Calculate the shape for the HDF5 chunk."""

        # In case of a scalar shape, return the unit chunksize
        if self.shape == ():
            return (SizeType(1), )

        # Compute the chunksize
        MB = 1024 * 1024
        expectedsizeinMB = (expectedrows * rowsize) / MB
        chunksize = calc_chunksize(expectedsizeinMB)

        maindim = self.maindim
        # Compute the chunknitems
        chunknitems = chunksize // itemsize
        # Safeguard against itemsizes being extremely large
        if chunknitems == 0:
            chunknitems = 1
        chunkshape = list(self.shape)
        # Check whether trimming the main dimension is enough
        chunkshape[maindim] = 1
        newchunknitems = numpy.prod(chunkshape, dtype=SizeType)
        if newchunknitems <= chunknitems:
            chunkshape[maindim] = chunknitems // newchunknitems
        else:
            # No, so start trimming other dimensions as well
            for j in xrange(len(chunkshape)):
                # Check whether trimming this dimension is enough
                chunkshape[j] = 1
                newchunknitems = numpy.prod(chunkshape, dtype=SizeType)
                if newchunknitems <= chunknitems:
                    chunkshape[j] = chunknitems // newchunknitems
                    break
            else:
                # Ops, we ran out of the loop without a break
                # Set the last dimension to chunknitems
                chunkshape[-1] = chunknitems

        return tuple(SizeType(s) for s in chunkshape)
コード例 #7
0
    def _g_create(self):
        """Save a new array in file."""

        self._v_version = obversion
        try:
            # `Leaf._g_postInitHook()` should be setting the flavor on disk.
            self._flavor = flavor = flavor_of(self._object)
            nparr = array_as_internal(self._object, flavor)
        except:  #XXX
            # Problems converting data. Close the node and re-raise exception.
            self.close(flush=0)
            raise

        # Raise an error in case of unsupported object
        if nparr.dtype.kind in ['V', 'U', 'O']:  # in void, unicode, object
            raise TypeError, \
"Array objects cannot currently deal with void, unicode or object arrays"

        # Decrease the number of references to the object
        self._object = None

        # The shape of this array
        self.shape = tuple(SizeType(s) for s in nparr.shape)

        # Fix the byteorder of data
        nparr = self._g_fix_byteorder_data(nparr, nparr.dtype.byteorder)

        # Create the array on-disk
        try:
            # ``self._v_objectID`` needs to be set because would be
            # needed for setting attributes in some descendants later
            # on
            (self._v_objectID,
             self.atom) = self._createArray(nparr, self._v_new_title)
        except:  #XXX
            # Problems creating the Array on disk. Close node and re-raise.
            self.close(flush=0)
            raise

        # Compute the optimal buffer size
        chunkshape = self._calc_chunkshape(self.nrows, self.rowsize,
                                           self.atom.itemsize)
        self.nrowsinbuf = self._calc_nrowsinbuf(chunkshape, self.rowsize,
                                                self.atom.itemsize)
        # Arrays don't have chunkshapes (so, set it to None)
        self._v_chunkshape = None

        return self._v_objectID
コード例 #8
0
    def _calc_chunkshape(self, expectedsizeinMB):
        """Calculate the size for the HDF5 chunk."""

        chunksize = calc_chunksize(expectedsizeinMB)

        # For computing the chunkshape for HDF5 VL types, we have to
        # choose the itemsize of the *each* element of the atom and
        # not the size of the entire atom.  I don't know why this
        # should be like this, perhaps I should report this to the
        # HDF5 list.
        # F. Alted 2006-11-23
        #elemsize = self.atom.atomsize()
        elemsize = self._basesize
        # Set the chunkshape
        chunkshape = chunksize // elemsize
        # Safeguard against itemsizes being extremely large
        if chunkshape == 0:
            chunkshape = 1
        return (SizeType(chunkshape), )
コード例 #9
0
def _normalize_shape(shape):
    """Check that the `shape` is safe to be used and return it as a tuple."""

    if isinstance(shape, (int, numpy.integer, long)):
        if shape < 1:
            raise ValueError("shape value must be greater than 0: %d" % shape)
        shape = (shape, )  # N is a shorthand for (N,)
    try:
        shape = tuple(shape)
    except TypeError:
        raise TypeError("shape must be an integer or sequence: %r" % (shape, ))

    ## XXX Get from HDF5 library if possible.
    # HDF5 does not support ranks greater than 32
    if len(shape) > 32:
        raise ValueError("shapes with rank > 32 are not supported: %r" %
                         (shape, ))

    return tuple(SizeType(s) for s in shape)
コード例 #10
0
ファイル: vlarray.py プロジェクト: dattatele/PyTables
    def _g_create(self):
        """Create a variable length array (ragged array)."""

        atom = self.atom
        self._v_version = obversion
        # Check for zero dims in atom shape (not allowed in VLArrays)
        zerodims = numpy.sum(numpy.array(atom.shape) == 0)
        if zerodims > 0:
            raise ValueError, \
"""When creating VLArrays, none of the dimensions of the Atom instance can
be zero."""

        if not hasattr(atom, 'size'):  # it is a pseudo-atom
            self._atomicdtype = atom.base.dtype
            self._atomicsize = atom.base.size
            self._basesize = atom.base.itemsize
        else:
            self._atomicdtype = atom.dtype
            self._atomicsize = atom.size
            self._basesize = atom.itemsize
        self._atomictype = atom.type
        self._atomicshape = atom.shape

        # Compute the optimal chunkshape, if needed
        if self._v_chunkshape is None:
            self._v_chunkshape = self._calc_chunkshape(
                self._v_expectedsizeinMB)
        self.nrows = SizeType(0)  # No rows at creation time

        # Correct the byteorder if needed
        if self.byteorder is None:
            self.byteorder = correct_byteorder(atom.type, sys.byteorder)

        # After creating the vlarray, ``self._v_objectID`` needs to be
        # set because it is needed for setting attributes afterwards.
        self._v_objectID = self._createArray(self._v_new_title)

        # Add an attribute in case we have a pseudo-atom so that we
        # can retrieve the proper class after a re-opening operation.
        if not hasattr(atom, 'size'):  # it is a pseudo-atom
            self.attrs.PSEUDOATOM = atom.kind

        return self._v_objectID
コード例 #11
0
    def __init__(self,
                 parentNode,
                 name,
                 atom=None,
                 title="",
                 filters=None,
                 expectedsizeinMB=1.0,
                 chunkshape=None,
                 byteorder=None,
                 _log=True):

        self._v_version = None
        """The object version of this array."""
        self._v_new = new = atom is not None
        """Is this the first time the node has been created?"""
        self._v_new_title = title
        """New title for this node."""
        self._v_new_filters = filters
        """New filter properties for this array."""
        self._v_expectedsizeinMB = expectedsizeinMB
        """The expected size of the array in MiB."""
        self._v_chunkshape = None
        """Private storage for the `chunkshape` property of Leaf."""

        # Miscellaneous iteration rubbish.
        self._start = None
        """Starting row for the current iteration."""
        self._stop = None
        """Stopping row for the current iteration."""
        self._step = None
        """Step size for the current iteration."""
        self._nrowsread = None
        """Number of rows read up to the current state of iteration."""
        self._startb = None
        """Starting row for current buffer."""
        self._stopb = None
        """Stopping row for current buffer. """
        self._row = None
        """Current row in iterators (sentinel)."""
        self._init = False
        """Whether we are in the middle of an iteration or not (sentinel)."""
        self.listarr = None
        """Current buffer in iterators."""

        # Documented (*public*) attributes.
        self.atom = atom
        """
        An Atom (see :ref:`AtomClassDescr`) instance representing the
        *type* and *shape* of the atomic objects to be saved. You may
        use a *pseudo-atom* for storing a serialized object or
        variable length string per row.
        """
        self.nrow = None
        """On iterators, this is the index of the current row."""
        self.nrows = None
        """The current number of rows in the array."""
        self.extdim = 0  # VLArray only have one dimension currently
        """The index of the enlargeable dimension (always 0 for vlarrays)."""

        # Check the chunkshape parameter
        if new and chunkshape is not None:
            if isinstance(chunkshape, (int, numpy.integer, long)):
                chunkshape = (chunkshape, )
            try:
                chunkshape = tuple(chunkshape)
            except TypeError:
                raise TypeError(
                    "`chunkshape` parameter must be an integer or sequence "
                    "and you passed a %s" % type(chunkshape))
            if len(chunkshape) != 1:
                raise ValueError("`chunkshape` rank (length) must be 1: %r" %
                                 (chunkshape, ))
            self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)

        super(VLArray, self).__init__(parentNode, name, new, filters,
                                      byteorder, _log)
コード例 #12
0
    def __init__(self,
                 parentnode,
                 name,
                 atom=None,
                 shape=None,
                 title="",
                 filters=None,
                 chunkshape=None,
                 byteorder=None,
                 _log=True):

        self.atom = atom
        """An `Atom` instance representing the shape, type of the atomic
        objects to be saved.
        """
        self.shape = None
        """The shape of the stored array."""
        self.extdim = -1  # `CArray` objects are not enlargeable by default
        """The index of the enlargeable dimension."""

        # Other private attributes
        self._v_version = None
        """The object version of this array."""
        self._v_new = new = atom is not None
        """Is this the first time the node has been created?"""
        self._v_new_title = title
        """New title for this node."""
        self._v_convert = True
        """Whether the ``Array`` object must be converted or not."""
        self._v_chunkshape = chunkshape
        """Private storage for the `chunkshape` property of the leaf."""

        # Miscellaneous iteration rubbish.
        self._start = None
        """Starting row for the current iteration."""
        self._stop = None
        """Stopping row for the current iteration."""
        self._step = None
        """Step size for the current iteration."""
        self._nrowsread = None
        """Number of rows read up to the current state of iteration."""
        self._startb = None
        """Starting row for current buffer."""
        self._stopb = None
        """Stopping row for current buffer. """
        self._row = None
        """Current row in iterators (sentinel)."""
        self._init = False
        """Whether we are in the middle of an iteration or not (sentinel)."""
        self.listarr = None
        """Current buffer in iterators."""

        if new:
            if not isinstance(atom, Atom):
                raise ValueError("atom parameter should be an instance of "
                                 "tables.Atom and you passed a %s." %
                                 type(atom))
            if shape is None:
                raise ValueError("you must specify a non-empty shape")
            try:
                shape = tuple(shape)
            except TypeError:
                raise TypeError("`shape` parameter must be a sequence "
                                "and you passed a %s" % type(shape))
            self.shape = tuple(SizeType(s) for s in shape)

            if chunkshape is not None:
                try:
                    chunkshape = tuple(chunkshape)
                except TypeError:
                    raise TypeError(
                        "`chunkshape` parameter must be a sequence "
                        "and you passed a %s" % type(chunkshape))
                if len(shape) != len(chunkshape):
                    raise ValueError("the shape (%s) and chunkshape (%s) "
                                     "ranks must be equal." %
                                     (shape, chunkshape))
                elif min(chunkshape) < 1:
                    raise ValueError("chunkshape parameter cannot have "
                                     "zero-dimensions.")
                self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)

        # The `Array` class is not abstract enough! :(
        super(Array, self).__init__(parentnode, name, new, filters, byteorder,
                                    _log)
コード例 #13
0
ファイル: array.py プロジェクト: kaukrise/PyTables
 def _getnrows(self):
     if self.shape == ():
         return SizeType(1)  # scalar case
     else:
         return self.shape[self.maindim]
コード例 #14
0
 def _g_open(self):
     (self.shape, self.byteorder, objectID) = \
                  self._openUnImplemented()
     self.nrows = SizeType(self.shape[0])
     return objectID
コード例 #15
0
    def __init__(self,
                 parentNode,
                 name,
                 atom=None,
                 shape=None,
                 title="",
                 filters=None,
                 chunkshape=None,
                 byteorder=None,
                 _log=True):
        """
        Create a `CArray` instance.

        `atom`
            An `Atom` instance representing the *type* and *shape* of
            the atomic objects to be saved.

        `shape`
            The shape of the new array.

        `title`
            A description for this node (it sets the ``TITLE`` HDF5
            attribute on disk).

        `filters`
            An instance of the `Filters` class that provides
            information about the desired I/O filters to be applied
            during the life of this object.

        `chunkshape`
            The shape of the data chunk to be read or written in a
            single HDF5 I/O operation.  Filters are applied to those
            chunks of data.  The dimensionality of `chunkshape` must
            be the same as that of `shape`.  If ``None``, a sensible
            value is calculated (which is recommended).

        `byteorder`
            The byteorder of the data *on disk*, specified as 'little'
            or 'big'.  If this is not specified, the byteorder is that
            of the platform.
        """

        self.atom = atom
        """
        An `Atom` instance representing the shape, type of the atomic
        objects to be saved.
        """
        self.shape = None
        """The shape of the stored array."""
        self.extdim = -1  # `CArray` objects are not enlargeable by default
        """The index of the enlargeable dimension."""

        # Other private attributes
        self._v_version = None
        """The object version of this array."""
        self._v_new = new = atom is not None
        """Is this the first time the node has been created?"""
        self._v_new_title = title
        """New title for this node."""
        self._v_convert = True
        """Whether the ``Array`` object must be converted or not."""
        self._v_chunkshape = chunkshape
        """Private storage for the `chunkshape` property of the leaf."""

        # Miscellaneous iteration rubbish.
        self._start = None
        """Starting row for the current iteration."""
        self._stop = None
        """Stopping row for the current iteration."""
        self._step = None
        """Step size for the current iteration."""
        self._nrowsread = None
        """Number of rows read up to the current state of iteration."""
        self._startb = None
        """Starting row for current buffer."""
        self._stopb = None
        """Stopping row for current buffer. """
        self._row = None
        """Current row in iterators (sentinel)."""
        self._init = False
        """Whether we are in the middle of an iteration or not (sentinel)."""
        self.listarr = None
        """Current buffer in iterators."""

        if new:
            if not isinstance(atom, Atom):
                raise ValueError, """\
atom parameter should be an instance of tables.Atom and you passed a %s.""" \
% type(atom)
            if shape is None:
                raise ValueError("you must specify a non-empty shape")
            try:
                shape = tuple(shape)
            except TypeError:
                raise TypeError("`shape` parameter must be a sequence "
                                "and you passed a %s" % type(shape))
            self.shape = tuple(SizeType(s) for s in shape)

            if chunkshape is not None:
                try:
                    chunkshape = tuple(chunkshape)
                except TypeError:
                    raise TypeError(
                        "`chunkshape` parameter must be a sequence "
                        "and you passed a %s" % type(chunkshape))
                if len(shape) != len(chunkshape):
                    raise ValueError, """\
the shape (%s) and chunkshape (%s) ranks must be equal.""" \
% (shape, chunkshape)
                elif min(chunkshape) < 1:
                    raise ValueError, """ \
chunkshape parameter cannot have zero-dimensions."""
                self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)

        # The `Array` class is not abstract enough! :(
        super(Array, self).__init__(parentNode, name, new, filters, byteorder,
                                    _log)
コード例 #16
0
ファイル: vlarray.py プロジェクト: dattatele/PyTables
    def __init__(self,
                 parentNode,
                 name,
                 atom=None,
                 title="",
                 filters=None,
                 expectedsizeinMB=1.0,
                 chunkshape=None,
                 byteorder=None,
                 _log=True):
        """
        Create a `VLArray` instance.

        `atom`
            An `Atom` instance representing the *type* and *shape* of
            the atomic objects to be saved.

        `title`
            A description for this node (it sets the ``TITLE`` HDF5
            attribute on disk).

        `filters`
            An instance of the `Filters` class that provides
            information about the desired I/O filters to be applied
            during the life of this object.

        `expectedsizeinMB`
            An user estimate about the size (in MB) in the final
            `VLArray` object.  If not provided, the default value is 1
            MB.  If you plan to create either a much smaller or a much
            bigger `VLArray` try providing a guess; this will optimize
            the HDF5 B-Tree creation and management process time and
            the amount of memory used.

        `chunkshape`
            The shape of the data chunk to be read or written in a
            single HDF5 I/O operation.  Filters are applied to those
            chunks of data.  The dimensionality of `chunkshape` must
            be 1.  If ``None``, a sensible value is calculated (which
            is recommended).

        `byteorder`
            The byteorder of the data *on disk*, specified as 'little'
            or 'big'.  If this is not specified, the byteorder is that
            of the platform.
        """

        self._v_version = None
        """The object version of this array."""
        self._v_new = new = atom is not None
        """Is this the first time the node has been created?"""
        self._v_new_title = title
        """New title for this node."""
        self._v_new_filters = filters
        """New filter properties for this array."""
        self._v_expectedsizeinMB = expectedsizeinMB
        """The expected size of the array in MiB."""
        self._v_chunkshape = None
        """Private storage for the `chunkshape` property of Leaf."""

        # Miscellaneous iteration rubbish.
        self._start = None
        """Starting row for the current iteration."""
        self._stop = None
        """Stopping row for the current iteration."""
        self._step = None
        """Step size for the current iteration."""
        self._nrowsread = None
        """Number of rows read up to the current state of iteration."""
        self._startb = None
        """Starting row for current buffer."""
        self._stopb = None
        """Stopping row for current buffer. """
        self._row = None
        """Current row in iterators (sentinel)."""
        self._init = False
        """Whether we are in the middle of an iteration or not (sentinel)."""
        self.listarr = None
        """Current buffer in iterators."""

        # Documented (*public*) attributes.
        self.atom = atom
        """
        An `Atom` instance representing the shape and type of the
        atomic objects to be saved.
        """
        self.nrow = None
        """On iterators, this is the index of the current row."""
        self.nrows = None
        """The total number of rows."""
        self.extdim = 0  # VLArray only have one dimension currently
        """The index of the enlargeable dimension (always 0 for vlarrays)."""

        # Check the chunkshape parameter
        if new and chunkshape is not None:
            if isinstance(chunkshape, (int, numpy.integer, long)):
                chunkshape = (chunkshape, )
            try:
                chunkshape = tuple(chunkshape)
            except TypeError:
                raise TypeError(
                    "`chunkshape` parameter must be an integer or sequence "
                    "and you passed a %s" % type(chunkshape))
            if len(chunkshape) != 1:
                raise ValueError("`chunkshape` rank (length) must be 1: %r" %
                                 (chunkshape, ))
            self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)

        super(VLArray, self).__init__(parentNode, name, new, filters,
                                      byteorder, _log)