예제 #1
0
파일: evaluate.py 프로젝트: 87/PyTables
def _compute(result, function, arguments,
             start=None, stop=None, step=None):
    """Compute the `function` over the `arguments` and put the outcome in `result`"""
    arg0 = arguments[0]
    if hasattr(arg0, 'maindim'):
        maindim = arg0.maindim
        (start, stop, step) = arg0._processRangeRead(start, stop, step)
        nrowsinbuf = arg0.nrowsinbuf
        print "nrowsinbuf-->", nrowsinbuf
    else:
        maindim = 0
        (start, stop, step) = (0, len(arg0), 1)
        nrowsinbuf = len(arg0)
    shape = list(arg0.shape)
    shape[maindim] = lrange(start, stop, step).length

    # The slices parameter for arg0.__getitem__
    slices = [slice(0, dim, 1) for dim in arg0.shape]

    # This is a hack to prevent doing unnecessary conversions
    # when copying buffers
    if hasattr(arg0, 'maindim'):
        for arg in arguments:
            arg._v_convert = False

    # Start the computation itself
    for start2 in lrange(start, stop, step*nrowsinbuf):
        # Save the records on disk
        stop2 = start2 + step * nrowsinbuf
        if stop2 > stop:
            stop2 = stop
        # Set the proper slice in the main dimension
        slices[maindim] = slice(start2, stop2, step)
        start3 = (start2-start)/step
        stop3 = start3 + nrowsinbuf
        if stop3 > shape[maindim]:
            stop3 = shape[maindim]
        # Compute the slice to be filled in destination
        sl = []
        for i in range(maindim):
            sl.append(slice(None,None,None))
        sl.append(slice(start3,stop3,None))
        # Get the values for computing the buffer
        values = [arg.__getitem__(tuple(slices)) for arg in arguments]
        result[tuple(sl)] = function(*values)

    # Activate the conversion again (default)
    if hasattr(arg0, 'maindim'):
        for arg in arguments:
            arg._v_convert = True

    return result
예제 #2
0
def _compute(result, function, arguments, start=None, stop=None, step=None):
    """Compute the `function` over the `arguments` and put the outcome in `result`"""
    arg0 = arguments[0]
    if hasattr(arg0, 'maindim'):
        maindim = arg0.maindim
        (start, stop, step) = arg0._processRangeRead(start, stop, step)
        nrowsinbuf = arg0.nrowsinbuf
        print "nrowsinbuf-->", nrowsinbuf
    else:
        maindim = 0
        (start, stop, step) = (0, len(arg0), 1)
        nrowsinbuf = len(arg0)
    shape = list(arg0.shape)
    shape[maindim] = lrange(start, stop, step).length

    # The slices parameter for arg0.__getitem__
    slices = [slice(0, dim, 1) for dim in arg0.shape]

    # This is a hack to prevent doing unnecessary conversions
    # when copying buffers
    if hasattr(arg0, 'maindim'):
        for arg in arguments:
            arg._v_convert = False

    # Start the computation itself
    for start2 in lrange(start, stop, step * nrowsinbuf):
        # Save the records on disk
        stop2 = start2 + step * nrowsinbuf
        if stop2 > stop:
            stop2 = stop
        # Set the proper slice in the main dimension
        slices[maindim] = slice(start2, stop2, step)
        start3 = (start2 - start) / step
        stop3 = start3 + nrowsinbuf
        if stop3 > shape[maindim]:
            stop3 = shape[maindim]
        # Compute the slice to be filled in destination
        sl = []
        for i in range(maindim):
            sl.append(slice(None, None, None))
        sl.append(slice(start3, stop3, None))
        # Get the values for computing the buffer
        values = [arg.__getitem__(tuple(slices)) for arg in arguments]
        result[tuple(sl)] = function(*values)

    # Activate the conversion again (default)
    if hasattr(arg0, 'maindim'):
        for arg in arguments:
            arg._v_convert = True

    return result
    def _g_copyWithStats(self, group, name, start, stop, step,
                         title, filters, chunkshape, _log, **kwargs):
        "Private part of Leaf.copy() for each kind of leaf"

        # Build the new VLArray object
        object = VLArray(
            group, name, self.atom, title=title, filters=filters,
            expectedsizeinMB=self._v_expectedsizeinMB, chunkshape=chunkshape,
            _log=_log)
        # Now, fill the new vlarray with values from the old one
        # This is not buffered because we cannot forsee the length
        # of each record. So, the safest would be a copy row by row.
        # In the future, some analysis can be done in order to buffer
        # the copy process.
        nrowsinbuf = 1
        (start, stop, step) = self._processRangeRead(start, stop, step)
        # Optimized version (no conversions, no type and shape checks, etc...)
        nrowscopied = SizeType(0)
        nbytes = 0
        if not hasattr(self.atom, 'size'):  # it is a pseudo-atom
            atomsize = self.atom.base.size
        else:
            atomsize = self.atom.size
        for start2 in lrange(start, stop, step*nrowsinbuf):
            # Save the records on disk
            stop2 = start2+step*nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            nparr = self._readArray(start=start2, stop=stop2, step=step)[0]
            nobjects = nparr.shape[0]
            object._append(nparr, nobjects)
            nbytes += nobjects*atomsize
            nrowscopied +=1
        object.nrows = nrowscopied
        return (object, nbytes)
예제 #4
0
    def __iter__(self):
        """Iterate over the rows of the outcome of the expression.

        This iterator always returns rows as NumPy objects, so a
        possible `out` container specified in `Expr.setOutput()` method
        is ignored here.

        See the `Expr.eval()` documentation for details on how the
        computation is carried out.  Also, for some examples of use see
        the `Expr.__init__()` docstrings.
        """

        values, shape, maindim = self.values, self.shape, self.maindim

        # Get different info we need for the main computation loop
        (i_nrows, slice_pos, start, stop, step, nrowsinbuf) = \
                  self._get_info(shape, maindim, itermode=True)

        if i_nrows == 0:
            # No elements to compute
            return

        # Create a key that selects every element in inputs
        # (including the main dimension)
        i_slices = [slice(None)] * (maindim + 1)

        # This is a hack to prevent doing unnecessary flavor conversions
        # while reading buffers
        for val in values:
            if hasattr(val, 'maindim'):
                val._v_convert = False

        # Start the computation itself
        for start2 in lrange(start, stop, step * nrowsinbuf):
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the main dimension
            i_slices[maindim] = slice(start2, stop2, step)
            # Get the values for computing the buffer
            vals = []
            for i, val in enumerate(values):
                if i in slice_pos:
                    vals.append(val.__getitem__(tuple(i_slices)))
                else:
                    # A read of values is not apparently needed, as PyTables
                    # leaves seems to work just fine inside Numexpr
                    vals.append(val)
            # Do the actual computation
            rout = self._compiled_expr(*vals)
            # Return one row per call
            for row in rout:
                yield row

        # Activate the conversion again (default)
        for val in values:
            if hasattr(val, 'maindim'):
                val._v_convert = True
예제 #5
0
    def __iter__(self):
        """Iterate over the rows of the outcome of the expression.

        This iterator always returns rows as NumPy objects, so a
        possible `out` container specified in `Expr.setOutput()` method
        is ignored here.

        See the `Expr.eval()` documentation for details on how the
        computation is carried out.  Also, for some examples of use see
        the `Expr.__init__()` docstrings.
        """

        values, shape, maindim = self.values, self.shape, self.maindim

        # Get different info we need for the main computation loop
        (i_nrows, slice_pos, start, stop, step, nrowsinbuf) = self._get_info(shape, maindim, itermode=True)

        if i_nrows == 0:
            # No elements to compute
            return

        # Create a key that selects every element in inputs
        # (including the main dimension)
        i_slices = [slice(None)] * (maindim + 1)

        # This is a hack to prevent doing unnecessary flavor conversions
        # while reading buffers
        for val in values:
            if hasattr(val, "maindim"):
                val._v_convert = False

        # Start the computation itself
        for start2 in lrange(start, stop, step * nrowsinbuf):
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the main dimension
            i_slices[maindim] = slice(start2, stop2, step)
            # Get the values for computing the buffer
            vals = []
            for i, val in enumerate(values):
                if i in slice_pos:
                    vals.append(val.__getitem__(tuple(i_slices)))
                else:
                    # A read of values is not apparently needed, as PyTables
                    # leaves seems to work just fine inside Numexpr
                    vals.append(val)
            # Do the actual computation
            rout = self._compiled_expr(*vals)
            # Return one row per call
            for row in rout:
                yield row

        # Activate the conversion again (default)
        for val in values:
            if hasattr(val, "maindim"):
                val._v_convert = True
예제 #6
0
    def _g_copyWithStats(self, group, name, start, stop, step, title, filters,
                         chunkshape, _log, **kwargs):
        "Private part of Leaf.copy() for each kind of leaf"
        (start, stop, step) = self._processRangeRead(start, stop, step)
        maindim = self.maindim
        shape = list(self.shape)
        shape[maindim] = lrange(start, stop, step).length
        # Now, fill the new carray with values from source
        nrowsinbuf = self.nrowsinbuf
        # The slices parameter for self.__getitem__
        slices = [slice(0, dim, 1) for dim in self.shape]
        # This is a hack to prevent doing unnecessary conversions
        # when copying buffers
        self._v_convert = False
        # Build the new CArray object
        object = CArray(group,
                        name,
                        atom=self.atom,
                        shape=shape,
                        title=title,
                        filters=filters,
                        chunkshape=chunkshape,
                        _log=_log)
        # Start the copy itself
        for start2 in lrange(start, stop, step * nrowsinbuf):
            # Save the records on disk
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the main dimension
            slices[maindim] = slice(start2, stop2, step)
            start3 = (start2 - start) / step
            stop3 = start3 + nrowsinbuf
            if stop3 > shape[maindim]:
                stop3 = shape[maindim]
            # The next line should be generalised if, in the future,
            # maindim is designed to be different from 0 in CArrays.
            # See ticket #199.
            object[start3:stop3] = self.__getitem__(tuple(slices))
        # Activate the conversion again (default)
        self._v_convert = True
        nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size

        return (object, nbytes)
예제 #7
0
    def _g_copyWithStats(self, group, name, start, stop, step, title, filters,
                         chunkshape, _log, **kwargs):
        """Private part of Leaf.copy() for each kind of leaf."""

        (start, stop, step) = self._processRangeRead(start, stop, step)
        # Build the new EArray object
        maindim = self.maindim
        shape = list(self.shape)
        shape[maindim] = 0
        # The number of final rows
        nrows = lrange(start, stop, step).length
        # Build the new EArray object
        object = EArray(group,
                        name,
                        atom=self.atom,
                        shape=shape,
                        title=title,
                        filters=filters,
                        expectedrows=nrows,
                        chunkshape=chunkshape,
                        _log=_log)
        # Now, fill the new earray with values from source
        nrowsinbuf = self.nrowsinbuf
        # The slices parameter for self.__getitem__
        slices = [slice(0, dim, 1) for dim in self.shape]
        # This is a hack to prevent doing unnecessary conversions
        # when copying buffers
        self._v_convert = False
        # Start the copy itself
        for start2 in lrange(start, stop, step * nrowsinbuf):
            # Save the records on disk
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the extensible dimension
            slices[maindim] = slice(start2, stop2, step)
            object._append(self.__getitem__(tuple(slices)))
        # Active the conversion again (default)
        self._v_convert = True
        nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.itemsize

        return (object, nbytes)
예제 #8
0
    def _g_copyWithStats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs):
        "Private part of Leaf.copy() for each kind of leaf."
        (start, stop, step) = self._processRangeRead(start, stop, step)
        # Build the new EArray object
        maindim = self.maindim
        shape = list(self.shape)
        shape[maindim] = 0
        # The number of final rows
        nrows = lrange(start, stop, step).length
        # Build the new EArray object
        object = EArray(
            group,
            name,
            atom=self.atom,
            shape=shape,
            title=title,
            filters=filters,
            expectedrows=nrows,
            chunkshape=chunkshape,
            _log=_log,
        )
        # Now, fill the new earray with values from source
        nrowsinbuf = self.nrowsinbuf
        # The slices parameter for self.__getitem__
        slices = [slice(0, dim, 1) for dim in self.shape]
        # This is a hack to prevent doing unnecessary conversions
        # when copying buffers
        self._v_convert = False
        # Start the copy itself
        for start2 in lrange(start, stop, step * nrowsinbuf):
            # Save the records on disk
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the extensible dimension
            slices[maindim] = slice(start2, stop2, step)
            object._append(self.__getitem__(tuple(slices)))
        # Active the conversion again (default)
        self._v_convert = True
        nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.itemsize

        return (object, nbytes)
예제 #9
0
    def _g_copyWithStats(self, group, name, start, stop, step,
                         title, filters, chunkshape, _log, **kwargs):
        """Private part of Leaf.copy() for each kind of leaf"""
        (start, stop, step) = self._processRangeRead(start, stop, step)
        maindim = self.maindim
        shape = list(self.shape)
        shape[maindim] = lrange(start, stop, step).length
        # Now, fill the new carray with values from source
        nrowsinbuf = self.nrowsinbuf
        # The slices parameter for self.__getitem__
        slices = [slice(0, dim, 1) for dim in self.shape]
        # This is a hack to prevent doing unnecessary conversions
        # when copying buffers
        self._v_convert = False
        # Build the new CArray object
        object = CArray(group, name, atom=self.atom, shape=shape,
                        title=title, filters=filters, chunkshape=chunkshape,
                        _log=_log)
        # Start the copy itself
        for start2 in lrange(start, stop, step*nrowsinbuf):
            # Save the records on disk
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the main dimension
            slices[maindim] = slice(start2, stop2, step)
            start3 = (start2-start)/step
            stop3 = start3 + nrowsinbuf
            if stop3 > shape[maindim]:
                stop3 = shape[maindim]
            # The next line should be generalised if, in the future,
            # maindim is designed to be different from 0 in CArrays.
            # See ticket #199.
            object[start3:stop3] = self.__getitem__(tuple(slices))
        # Activate the conversion again (default)
        self._v_convert = True
        nbytes = numpy.prod(self.shape, dtype=SizeType)*self.atom.size

        return (object, nbytes)
예제 #10
0
파일: array.py 프로젝트: vkarthi46/PyTables
    def _read(self, start, stop, step):
        """Read the array from disk without slice or flavor processing."""

        rowstoread = lrange(start, stop, step).length
        shape = list(self.shape)
        if shape:
            shape[self.maindim] = rowstoread
        arr = numpy.empty(dtype=self.atom.dtype, shape=shape)

        # Protection against reading empty arrays
        if 0 not in shape:
            # Arrays that have non-zero dimensionality
            self._readArray(start, stop, step, arr)
        return arr
예제 #11
0
파일: array.py 프로젝트: dattatele/PyTables
    def _read(self, start, stop, step):
        """Read the array from disk without slice or flavor processing."""

        rowstoread = lrange(start, stop, step).length
        shape = list(self.shape)
        if shape:
            shape[self.maindim] = rowstoread
        arr = numpy.empty(dtype=self.atom.dtype, shape=shape)

        # Protection against reading empty arrays
        if 0 not in shape:
            # Arrays that have non-zero dimensionality
            self._readArray(start, stop, step, arr)
        return arr
예제 #12
0
    def _interpret_indexing(self, keys):
        """Internal routine used by __getitem__ and __setitem__"""

        maxlen = len(self.shape)
        shape = (maxlen,)
        startl = numpy.empty(shape=shape, dtype=SizeType)
        stopl = numpy.empty(shape=shape, dtype=SizeType)
        stepl = numpy.empty(shape=shape, dtype=SizeType)
        stop_None = numpy.zeros(shape=shape, dtype=SizeType)
        if not isinstance(keys, tuple):
            keys = (keys,)
        nkeys = len(keys)
        dim = 0
        # Here is some problem when dealing with [...,...] params
        # but this is a bit weird way to pass parameters anyway
        for key in keys:
            ellipsis = 0  # Sentinel
            if isinstance(key, types.EllipsisType):
                ellipsis = 1
                for diml in xrange(dim, len(self.shape) - (nkeys - dim) + 1):
                    startl[dim] = 0
                    stopl[dim] = self.shape[diml]
                    stepl[dim] = 1
                    dim += 1
            elif dim >= maxlen:
                raise IndexError, "Too many indices for object '%s'" % \
                      self._v_pathname
            elif is_idx(key):
                # Protection for index out of range
                if key >= self.shape[dim]:
                    raise IndexError, "Index out of range"
                if key < 0:
                    # To support negative values (Fixes bug #968149)
                    key += self.shape[dim]
                start, stop, step = self._processRange(
                    key, key+1, 1, dim=dim )
                stop_None[dim] = 1
            elif isinstance(key, slice):
                start, stop, step = self._processRange(
                    key.start, key.stop, key.step, dim=dim )
            else:
                raise TypeError, "Non-valid index or slice: %s" % \
                      key
            if not ellipsis:
                startl[dim] = start
                stopl[dim] = stop
                stepl[dim] = step
                dim += 1

        # Complete the other dimensions, if needed
        if dim < len(self.shape):
            for diml in xrange(dim, len(self.shape)):
                startl[dim] = 0
                stopl[dim] = self.shape[diml]
                stepl[dim] = 1
                dim += 1

        # Compute the shape for the container properly. Fixes #1288792
        shape = []
        for dim in xrange(len(self.shape)):
            # The negative division operates differently with python scalars
            # and numpy scalars (which are similar to C conventions). See:
            # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3
            # and
            # http://www.peterbe.com/Integer-division-in-programming-languages
            # for more info on this issue.
            # I've finally decided to rely on the len(xrange) function.
            # F. Alted 2006-09-25
            # Switch to `lrange` to allow long ranges (see #99).
            #new_dim = ((stopl[dim] - startl[dim] - 1) / stepl[dim]) + 1
            new_dim = lrange(startl[dim], stopl[dim], stepl[dim]).length
            if not (new_dim == 1 and stop_None[dim]):
            #if not stop_None[dim]:
                # Append dimension
                shape.append(new_dim)

        return startl, stopl, stepl, shape
예제 #13
0
    def eval(self):
        """Evaluate the expression and return the outcome.

        Because of performance reasons, the computation order tries to go along
        the common main dimension of all inputs.  If not such a common main
        dimension is found, the iteration will go along the leading dimension
        instead.

        For non-consistent shapes in inputs (i.e. shapes having a different
        number of dimensions), the regular NumPy broadcast rules applies.
        There is one exception to this rule though: when the dimensions
        orthogonal to the main dimension of the expression are consistent, but
        the main dimension itself differs among the inputs, then the shortest
        one is chosen for doing the computations.  This is so because trying to
        expand very large on-disk arrays could be too expensive or simply not
        possible.

        Also, the regular Numexpr casting rules (which are similar to those of
        NumPy, although you should check the Numexpr manual for the exceptions)
        are applied to determine the output type.

        Finally, if the setOuput() method specifying a user container has
        already been called, the output is sent to this user-provided
        container.  If not, a fresh NumPy container is returned instead.

        .. warning::

            When dealing with large on-disk inputs, failing to specify an
            on-disk container may consume all your available memory.
        """

        values, shape, maindim = self.values, self.shape, self.maindim

        # Get different info we need for the main computation loop
        (i_nrows, slice_pos, start, stop, step, nrowsinbuf,
         out, o_maindim, o_start, o_stop, o_step) = \
         self._get_info(shape, maindim)

        if i_nrows == 0:
            # No elements to compute
            return self._single_row_out

        # Create a key that selects every element in inputs and output
        # (including the main dimension)
        i_slices = [slice(None)] * (maindim + 1)
        o_slices = [slice(None)] * (o_maindim + 1)

        # This is a hack to prevent doing unnecessary flavor conversions
        # while reading buffers
        for val in values:
            if hasattr(val, 'maindim'):
                val._v_convert = False

        # Start the computation itself
        for start2 in lrange(start, stop, step * nrowsinbuf):
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice for inputs
            i_slices[maindim] = slice(start2, stop2, step)
            # Get the input values
            vals = []
            for i, val in enumerate(values):
                if i in slice_pos:
                    vals.append(val.__getitem__(tuple(i_slices)))
                else:
                    # A read of values is not apparently needed, as PyTables
                    # leaves seems to work just fine inside Numexpr
                    vals.append(val)
            # Do the actual computation for this slice
            rout = self._compiled_expr(*vals)
            # Set the values into the out buffer
            if self.append_mode:
                out.append(rout)
            else:
                # Compute the slice to be filled in output
                start3 = o_start + (start2 - start) / step
                stop3 = start3 + nrowsinbuf * o_step
                if stop3 > o_stop:
                    stop3 = o_stop
                o_slices[o_maindim] = slice(start3, stop3, o_step)
                # Set the slice
                out[tuple(o_slices)] = rout

        # Activate the conversion again (default)
        for val in values:
            if hasattr(val, 'maindim'):
                val._v_convert = True

        return out
예제 #14
0
    def _get_info(self, shape, maindim, itermode=False):
        """Return various info needed for evaluating the computation loop."""

        # Compute the shape of the resulting container having
        # in account new possible values of start, stop and step in
        # the inputs range
        if maindim is not None:
            (start, stop, step) = getIndices(self.start, self.stop, self.step,
                                             shape[maindim])
            shape[maindim] = min(shape[maindim],
                                 lrange(start, stop, step).length)
            i_nrows = shape[maindim]
        else:
            start, stop, step = 0, 0, None
            i_nrows = 0

        if not itermode:
            # Create a container for output if not defined yet
            o_maindim = 0  # Default maindim
            if self.out is None:
                out = np.empty(shape, dtype=self._single_row_out.dtype)
                # Get the trivial values for start, stop and step
                if maindim is not None:
                    (o_start, o_stop, o_step) = (0, shape[maindim], 1)
                else:
                    (o_start, o_stop, o_step) = (0, 0, 1)
            else:
                out = self.out
                # Out container already provided.  Do some sanity checks.
                if hasattr(out, "maindim"):
                    o_maindim = out.maindim

                # Refine the shape of the resulting container having in
                # account new possible values of start, stop and step in
                # the output range
                o_shape = list(out.shape)
                (o_start, o_stop,
                 o_step) = getIndices(self.o_start, self.o_stop, self.o_step,
                                      o_shape[o_maindim])
                o_shape[o_maindim] = min(
                    o_shape[o_maindim],
                    lrange(o_start, o_stop, o_step).length)

                # Check that the shape of output is consistent with inputs
                tr_oshape = list(o_shape)  # this implies a copy
                olen_ = tr_oshape.pop(o_maindim)
                tr_shape = list(shape)  # do a copy
                if maindim is not None:
                    len_ = tr_shape.pop(o_maindim)
                else:
                    len_ = 1
                if tr_oshape != tr_shape:
                    raise ValueError(
                        "Shape for out container does not match expression")
                # Force the input length to fit in `out`
                if not self.append_mode and olen_ < len_:
                    shape[o_maindim] = olen_
                    stop = start + olen_

        # Get the positions of inputs that should be sliced (the others
        # will be broadcasted)
        ndim = len(shape)
        slice_pos = [
            i for i, val in enumerate(self.values) if len(val.shape) == ndim
        ]

        # The size of the I/O buffer
        nrowsinbuf = 1
        for i, val in enumerate(self.values):
            # Skip scalar values in variables
            if i in slice_pos:
                nrows = self._calc_nrowsinbuf(val)
                if nrows > nrowsinbuf:
                    nrowsinbuf = nrows

        if not itermode:
            return (i_nrows, slice_pos, start, stop, step, nrowsinbuf, out,
                    o_maindim, o_start, o_stop, o_step)
        else:
            # For itermode, we don't need the out info
            return (i_nrows, slice_pos, start, stop, step, nrowsinbuf)
예제 #15
0
    def eval(self):
        """Evaluate the expression and return the outcome.

        Because of performance reasons, the computation order tries to go along
        the common main dimension of all inputs.  If not such a common main
        dimension is found, the iteration will go along the leading dimension
        instead.

        For non-consistent shapes in inputs (i.e. shapes having a different
        number of dimensions), the regular NumPy broadcast rules applies.
        There is one exception to this rule though: when the dimensions
        orthogonal to the main dimension of the expression are consistent, but
        the main dimension itself differs among the inputs, then the shortest
        one is chosen for doing the computations.  This is so because trying to
        expand very large on-disk arrays could be too expensive or simply not
        possible.

        Also, the regular Numexpr casting rules (which are similar to those of
        NumPy, although you should check the Numexpr manual for the exceptions)
        are applied to determine the output type.

        Finally, if the setOuput() method specifying a user container has
        already been called, the output is sent to this user-provided
        container.  If not, a fresh NumPy container is returned instead.

        .. warning::

            When dealing with large on-disk inputs, failing to specify an
            on-disk container may consume all your available memory.
        """

        values, shape, maindim = self.values, self.shape, self.maindim

        # Get different info we need for the main computation loop
        (i_nrows, slice_pos, start, stop, step, nrowsinbuf,
         out, o_maindim, o_start, o_stop, o_step) = \
         self._get_info(shape, maindim)

        if i_nrows == 0:
            # No elements to compute
            return self._single_row_out

        # Create a key that selects every element in inputs and output
        # (including the main dimension)
        i_slices = [slice(None)]*(maindim+1)
        o_slices = [slice(None)]*(o_maindim+1)

        # This is a hack to prevent doing unnecessary flavor conversions
        # while reading buffers
        for val in values:
            if hasattr(val, 'maindim'):
                val._v_convert = False

        # Start the computation itself
        for start2 in lrange(start, stop, step*nrowsinbuf):
            stop2 = start2 + step * nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice for inputs
            i_slices[maindim] = slice(start2, stop2, step)
            # Get the input values
            vals = []
            for i, val in enumerate(values):
                if i in slice_pos:
                    vals.append(val.__getitem__(tuple(i_slices)))
                else:
                    # A read of values is not apparently needed, as PyTables
                    # leaves seems to work just fine inside Numexpr
                    vals.append(val)
            # Do the actual computation for this slice
            rout = self._compiled_expr(*vals)
            # Set the values into the out buffer
            if self.append_mode:
                out.append(rout)
            else:
                # Compute the slice to be filled in output
                start3 = o_start + (start2-start)/step
                stop3 = start3 + nrowsinbuf*o_step
                if stop3 > o_stop:
                    stop3 = o_stop
                o_slices[o_maindim] = slice(start3, stop3, o_step)
                # Set the slice
                out[tuple(o_slices)] = rout

        # Activate the conversion again (default)
        for val in values:
            if hasattr(val, 'maindim'):
                val._v_convert = True

        return out
예제 #16
0
    def _get_info(self, shape, maindim, itermode=False):
        """Return various info needed for evaluating the computation loop."""

        # Compute the shape of the resulting container having
        # in account new possible values of start, stop and step in
        # the inputs range
        if maindim is not None:
            (start, stop, step) = getIndices(
                self.start, self.stop, self.step, shape[maindim])
            shape[maindim] = min(
                shape[maindim], lrange(start, stop, step).length)
            i_nrows = shape[maindim]
        else:
            start, stop, step = 0, 0, None
            i_nrows = 0

        if not itermode:
            # Create a container for output if not defined yet
            o_maindim = 0    # Default maindim
            if self.out is None:
                out = np.empty(shape, dtype=self._single_row_out.dtype)
                # Get the trivial values for start, stop and step
                if maindim is not None:
                    (o_start, o_stop, o_step) = (0, shape[maindim], 1)
                else:
                    (o_start, o_stop, o_step) = (0, 0, 1)
            else:
                out = self.out
                # Out container already provided.  Do some sanity checks.
                if hasattr(out, "maindim"):
                    o_maindim = out.maindim

                # Refine the shape of the resulting container having in
                # account new possible values of start, stop and step in
                # the output range
                o_shape = list(out.shape)
                (o_start, o_stop, o_step) = getIndices(
                    self.o_start, self.o_stop, self.o_step, o_shape[o_maindim])
                o_shape[o_maindim] = min(o_shape[o_maindim],
                                         lrange(o_start, o_stop, o_step).length)

                # Check that the shape of output is consistent with inputs
                tr_oshape = list(o_shape)   # this implies a copy
                olen_ = tr_oshape.pop(o_maindim)
                tr_shape = list(shape)      # do a copy
                if maindim is not None:
                    len_ = tr_shape.pop(o_maindim)
                else:
                    len_ = 1
                if tr_oshape != tr_shape:
                    raise ValueError(
                        "Shape for out container does not match expression")
                # Force the input length to fit in `out`
                if not self.append_mode and olen_ < len_:
                    shape[o_maindim] = olen_
                    stop = start + olen_

        # Get the positions of inputs that should be sliced (the others
        # will be broadcasted)
        ndim = len(shape)
        slice_pos = [i for i, val in enumerate(self.values)
                     if len(val.shape) == ndim]

        # The size of the I/O buffer
        nrowsinbuf = 1
        for i, val in enumerate(self.values):
            # Skip scalar values in variables
            if i in slice_pos:
                nrows = self._calc_nrowsinbuf(val)
                if nrows > nrowsinbuf:
                    nrowsinbuf = nrows

        if not itermode:
            return (i_nrows, slice_pos, start, stop, step, nrowsinbuf,
                    out, o_maindim, o_start, o_stop, o_step)
        else:
            # For itermode, we don't need the out info
            return (i_nrows, slice_pos, start, stop, step, nrowsinbuf)
예제 #17
0
파일: array.py 프로젝트: dattatele/PyTables
    def _interpret_indexing(self, keys):
        """Internal routine used by __getitem__ and __setitem__"""

        maxlen = len(self.shape)
        shape = (maxlen,)
        startl = numpy.empty(shape=shape, dtype=SizeType)
        stopl = numpy.empty(shape=shape, dtype=SizeType)
        stepl = numpy.empty(shape=shape, dtype=SizeType)
        stop_None = numpy.zeros(shape=shape, dtype=SizeType)
        if not isinstance(keys, tuple):
            keys = (keys,)
        nkeys = len(keys)
        dim = 0
        # Here is some problem when dealing with [...,...] params
        # but this is a bit weird way to pass parameters anyway
        for key in keys:
            ellipsis = 0  # Sentinel
            if isinstance(key, types.EllipsisType):
                ellipsis = 1
                for diml in xrange(dim, len(self.shape) - (nkeys - dim) + 1):
                    startl[dim] = 0
                    stopl[dim] = self.shape[diml]
                    stepl[dim] = 1
                    dim += 1
            elif dim >= maxlen:
                raise IndexError, "Too many indices for object '%s'" % \
                      self._v_pathname
            elif is_idx(key):
                # Protection for index out of range
                if key >= self.shape[dim]:
                    raise IndexError, "Index out of range"
                if key < 0:
                    # To support negative values (Fixes bug #968149)
                    key += self.shape[dim]
                start, stop, step = self._processRange(
                    key, key+1, 1, dim=dim )
                stop_None[dim] = 1
            elif isinstance(key, slice):
                start, stop, step = self._processRange(
                    key.start, key.stop, key.step, dim=dim )
            else:
                raise TypeError, "Non-valid index or slice: %s" % \
                      key
            if not ellipsis:
                startl[dim] = start
                stopl[dim] = stop
                stepl[dim] = step
                dim += 1

        # Complete the other dimensions, if needed
        if dim < len(self.shape):
            for diml in xrange(dim, len(self.shape)):
                startl[dim] = 0
                stopl[dim] = self.shape[diml]
                stepl[dim] = 1
                dim += 1

        # Compute the shape for the container properly. Fixes #1288792
        shape = []
        for dim in xrange(len(self.shape)):
            # The negative division operates differently with python scalars
            # and numpy scalars (which are similar to C conventions). See:
            # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3
            # and
            # http://www.peterbe.com/Integer-division-in-programming-languages
            # for more info on this issue.
            # I've finally decided to rely on the len(xrange) function.
            # F. Alted 2006-09-25
            # Switch to `lrange` to allow long ranges (see #99).
            #new_dim = ((stopl[dim] - startl[dim] - 1) / stepl[dim]) + 1
            new_dim = lrange(startl[dim], stopl[dim], stepl[dim]).length
            if not (new_dim == 1 and stop_None[dim]):
                shape.append(new_dim)

        return startl, stopl, stepl, shape