def _compute(result, function, arguments, start=None, stop=None, step=None): """Compute the `function` over the `arguments` and put the outcome in `result`""" arg0 = arguments[0] if hasattr(arg0, 'maindim'): maindim = arg0.maindim (start, stop, step) = arg0._processRangeRead(start, stop, step) nrowsinbuf = arg0.nrowsinbuf print "nrowsinbuf-->", nrowsinbuf else: maindim = 0 (start, stop, step) = (0, len(arg0), 1) nrowsinbuf = len(arg0) shape = list(arg0.shape) shape[maindim] = lrange(start, stop, step).length # The slices parameter for arg0.__getitem__ slices = [slice(0, dim, 1) for dim in arg0.shape] # This is a hack to prevent doing unnecessary conversions # when copying buffers if hasattr(arg0, 'maindim'): for arg in arguments: arg._v_convert = False # Start the computation itself for start2 in lrange(start, stop, step*nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the main dimension slices[maindim] = slice(start2, stop2, step) start3 = (start2-start)/step stop3 = start3 + nrowsinbuf if stop3 > shape[maindim]: stop3 = shape[maindim] # Compute the slice to be filled in destination sl = [] for i in range(maindim): sl.append(slice(None,None,None)) sl.append(slice(start3,stop3,None)) # Get the values for computing the buffer values = [arg.__getitem__(tuple(slices)) for arg in arguments] result[tuple(sl)] = function(*values) # Activate the conversion again (default) if hasattr(arg0, 'maindim'): for arg in arguments: arg._v_convert = True return result
def _compute(result, function, arguments, start=None, stop=None, step=None): """Compute the `function` over the `arguments` and put the outcome in `result`""" arg0 = arguments[0] if hasattr(arg0, 'maindim'): maindim = arg0.maindim (start, stop, step) = arg0._processRangeRead(start, stop, step) nrowsinbuf = arg0.nrowsinbuf print "nrowsinbuf-->", nrowsinbuf else: maindim = 0 (start, stop, step) = (0, len(arg0), 1) nrowsinbuf = len(arg0) shape = list(arg0.shape) shape[maindim] = lrange(start, stop, step).length # The slices parameter for arg0.__getitem__ slices = [slice(0, dim, 1) for dim in arg0.shape] # This is a hack to prevent doing unnecessary conversions # when copying buffers if hasattr(arg0, 'maindim'): for arg in arguments: arg._v_convert = False # Start the computation itself for start2 in lrange(start, stop, step * nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the main dimension slices[maindim] = slice(start2, stop2, step) start3 = (start2 - start) / step stop3 = start3 + nrowsinbuf if stop3 > shape[maindim]: stop3 = shape[maindim] # Compute the slice to be filled in destination sl = [] for i in range(maindim): sl.append(slice(None, None, None)) sl.append(slice(start3, stop3, None)) # Get the values for computing the buffer values = [arg.__getitem__(tuple(slices)) for arg in arguments] result[tuple(sl)] = function(*values) # Activate the conversion again (default) if hasattr(arg0, 'maindim'): for arg in arguments: arg._v_convert = True return result
def _g_copyWithStats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): "Private part of Leaf.copy() for each kind of leaf" # Build the new VLArray object object = VLArray( group, name, self.atom, title=title, filters=filters, expectedsizeinMB=self._v_expectedsizeinMB, chunkshape=chunkshape, _log=_log) # Now, fill the new vlarray with values from the old one # This is not buffered because we cannot forsee the length # of each record. So, the safest would be a copy row by row. # In the future, some analysis can be done in order to buffer # the copy process. nrowsinbuf = 1 (start, stop, step) = self._processRangeRead(start, stop, step) # Optimized version (no conversions, no type and shape checks, etc...) nrowscopied = SizeType(0) nbytes = 0 if not hasattr(self.atom, 'size'): # it is a pseudo-atom atomsize = self.atom.base.size else: atomsize = self.atom.size for start2 in lrange(start, stop, step*nrowsinbuf): # Save the records on disk stop2 = start2+step*nrowsinbuf if stop2 > stop: stop2 = stop nparr = self._readArray(start=start2, stop=stop2, step=step)[0] nobjects = nparr.shape[0] object._append(nparr, nobjects) nbytes += nobjects*atomsize nrowscopied +=1 object.nrows = nrowscopied return (object, nbytes)
def __iter__(self): """Iterate over the rows of the outcome of the expression. This iterator always returns rows as NumPy objects, so a possible `out` container specified in `Expr.setOutput()` method is ignored here. See the `Expr.eval()` documentation for details on how the computation is carried out. Also, for some examples of use see the `Expr.__init__()` docstrings. """ values, shape, maindim = self.values, self.shape, self.maindim # Get different info we need for the main computation loop (i_nrows, slice_pos, start, stop, step, nrowsinbuf) = \ self._get_info(shape, maindim, itermode=True) if i_nrows == 0: # No elements to compute return # Create a key that selects every element in inputs # (including the main dimension) i_slices = [slice(None)] * (maindim + 1) # This is a hack to prevent doing unnecessary flavor conversions # while reading buffers for val in values: if hasattr(val, 'maindim'): val._v_convert = False # Start the computation itself for start2 in lrange(start, stop, step * nrowsinbuf): stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the main dimension i_slices[maindim] = slice(start2, stop2, step) # Get the values for computing the buffer vals = [] for i, val in enumerate(values): if i in slice_pos: vals.append(val.__getitem__(tuple(i_slices))) else: # A read of values is not apparently needed, as PyTables # leaves seems to work just fine inside Numexpr vals.append(val) # Do the actual computation rout = self._compiled_expr(*vals) # Return one row per call for row in rout: yield row # Activate the conversion again (default) for val in values: if hasattr(val, 'maindim'): val._v_convert = True
def __iter__(self): """Iterate over the rows of the outcome of the expression. This iterator always returns rows as NumPy objects, so a possible `out` container specified in `Expr.setOutput()` method is ignored here. See the `Expr.eval()` documentation for details on how the computation is carried out. Also, for some examples of use see the `Expr.__init__()` docstrings. """ values, shape, maindim = self.values, self.shape, self.maindim # Get different info we need for the main computation loop (i_nrows, slice_pos, start, stop, step, nrowsinbuf) = self._get_info(shape, maindim, itermode=True) if i_nrows == 0: # No elements to compute return # Create a key that selects every element in inputs # (including the main dimension) i_slices = [slice(None)] * (maindim + 1) # This is a hack to prevent doing unnecessary flavor conversions # while reading buffers for val in values: if hasattr(val, "maindim"): val._v_convert = False # Start the computation itself for start2 in lrange(start, stop, step * nrowsinbuf): stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the main dimension i_slices[maindim] = slice(start2, stop2, step) # Get the values for computing the buffer vals = [] for i, val in enumerate(values): if i in slice_pos: vals.append(val.__getitem__(tuple(i_slices))) else: # A read of values is not apparently needed, as PyTables # leaves seems to work just fine inside Numexpr vals.append(val) # Do the actual computation rout = self._compiled_expr(*vals) # Return one row per call for row in rout: yield row # Activate the conversion again (default) for val in values: if hasattr(val, "maindim"): val._v_convert = True
def _g_copyWithStats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): "Private part of Leaf.copy() for each kind of leaf" (start, stop, step) = self._processRangeRead(start, stop, step) maindim = self.maindim shape = list(self.shape) shape[maindim] = lrange(start, stop, step).length # Now, fill the new carray with values from source nrowsinbuf = self.nrowsinbuf # The slices parameter for self.__getitem__ slices = [slice(0, dim, 1) for dim in self.shape] # This is a hack to prevent doing unnecessary conversions # when copying buffers self._v_convert = False # Build the new CArray object object = CArray(group, name, atom=self.atom, shape=shape, title=title, filters=filters, chunkshape=chunkshape, _log=_log) # Start the copy itself for start2 in lrange(start, stop, step * nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the main dimension slices[maindim] = slice(start2, stop2, step) start3 = (start2 - start) / step stop3 = start3 + nrowsinbuf if stop3 > shape[maindim]: stop3 = shape[maindim] # The next line should be generalised if, in the future, # maindim is designed to be different from 0 in CArrays. # See ticket #199. object[start3:stop3] = self.__getitem__(tuple(slices)) # Activate the conversion again (default) self._v_convert = True nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size return (object, nbytes)
def _g_copyWithStats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): """Private part of Leaf.copy() for each kind of leaf.""" (start, stop, step) = self._processRangeRead(start, stop, step) # Build the new EArray object maindim = self.maindim shape = list(self.shape) shape[maindim] = 0 # The number of final rows nrows = lrange(start, stop, step).length # Build the new EArray object object = EArray(group, name, atom=self.atom, shape=shape, title=title, filters=filters, expectedrows=nrows, chunkshape=chunkshape, _log=_log) # Now, fill the new earray with values from source nrowsinbuf = self.nrowsinbuf # The slices parameter for self.__getitem__ slices = [slice(0, dim, 1) for dim in self.shape] # This is a hack to prevent doing unnecessary conversions # when copying buffers self._v_convert = False # Start the copy itself for start2 in lrange(start, stop, step * nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the extensible dimension slices[maindim] = slice(start2, stop2, step) object._append(self.__getitem__(tuple(slices))) # Active the conversion again (default) self._v_convert = True nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.itemsize return (object, nbytes)
def _g_copyWithStats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): "Private part of Leaf.copy() for each kind of leaf." (start, stop, step) = self._processRangeRead(start, stop, step) # Build the new EArray object maindim = self.maindim shape = list(self.shape) shape[maindim] = 0 # The number of final rows nrows = lrange(start, stop, step).length # Build the new EArray object object = EArray( group, name, atom=self.atom, shape=shape, title=title, filters=filters, expectedrows=nrows, chunkshape=chunkshape, _log=_log, ) # Now, fill the new earray with values from source nrowsinbuf = self.nrowsinbuf # The slices parameter for self.__getitem__ slices = [slice(0, dim, 1) for dim in self.shape] # This is a hack to prevent doing unnecessary conversions # when copying buffers self._v_convert = False # Start the copy itself for start2 in lrange(start, stop, step * nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the extensible dimension slices[maindim] = slice(start2, stop2, step) object._append(self.__getitem__(tuple(slices))) # Active the conversion again (default) self._v_convert = True nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.itemsize return (object, nbytes)
def _g_copyWithStats(self, group, name, start, stop, step, title, filters, chunkshape, _log, **kwargs): """Private part of Leaf.copy() for each kind of leaf""" (start, stop, step) = self._processRangeRead(start, stop, step) maindim = self.maindim shape = list(self.shape) shape[maindim] = lrange(start, stop, step).length # Now, fill the new carray with values from source nrowsinbuf = self.nrowsinbuf # The slices parameter for self.__getitem__ slices = [slice(0, dim, 1) for dim in self.shape] # This is a hack to prevent doing unnecessary conversions # when copying buffers self._v_convert = False # Build the new CArray object object = CArray(group, name, atom=self.atom, shape=shape, title=title, filters=filters, chunkshape=chunkshape, _log=_log) # Start the copy itself for start2 in lrange(start, stop, step*nrowsinbuf): # Save the records on disk stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice in the main dimension slices[maindim] = slice(start2, stop2, step) start3 = (start2-start)/step stop3 = start3 + nrowsinbuf if stop3 > shape[maindim]: stop3 = shape[maindim] # The next line should be generalised if, in the future, # maindim is designed to be different from 0 in CArrays. # See ticket #199. object[start3:stop3] = self.__getitem__(tuple(slices)) # Activate the conversion again (default) self._v_convert = True nbytes = numpy.prod(self.shape, dtype=SizeType)*self.atom.size return (object, nbytes)
def _read(self, start, stop, step): """Read the array from disk without slice or flavor processing.""" rowstoread = lrange(start, stop, step).length shape = list(self.shape) if shape: shape[self.maindim] = rowstoread arr = numpy.empty(dtype=self.atom.dtype, shape=shape) # Protection against reading empty arrays if 0 not in shape: # Arrays that have non-zero dimensionality self._readArray(start, stop, step, arr) return arr
def _interpret_indexing(self, keys): """Internal routine used by __getitem__ and __setitem__""" maxlen = len(self.shape) shape = (maxlen,) startl = numpy.empty(shape=shape, dtype=SizeType) stopl = numpy.empty(shape=shape, dtype=SizeType) stepl = numpy.empty(shape=shape, dtype=SizeType) stop_None = numpy.zeros(shape=shape, dtype=SizeType) if not isinstance(keys, tuple): keys = (keys,) nkeys = len(keys) dim = 0 # Here is some problem when dealing with [...,...] params # but this is a bit weird way to pass parameters anyway for key in keys: ellipsis = 0 # Sentinel if isinstance(key, types.EllipsisType): ellipsis = 1 for diml in xrange(dim, len(self.shape) - (nkeys - dim) + 1): startl[dim] = 0 stopl[dim] = self.shape[diml] stepl[dim] = 1 dim += 1 elif dim >= maxlen: raise IndexError, "Too many indices for object '%s'" % \ self._v_pathname elif is_idx(key): # Protection for index out of range if key >= self.shape[dim]: raise IndexError, "Index out of range" if key < 0: # To support negative values (Fixes bug #968149) key += self.shape[dim] start, stop, step = self._processRange( key, key+1, 1, dim=dim ) stop_None[dim] = 1 elif isinstance(key, slice): start, stop, step = self._processRange( key.start, key.stop, key.step, dim=dim ) else: raise TypeError, "Non-valid index or slice: %s" % \ key if not ellipsis: startl[dim] = start stopl[dim] = stop stepl[dim] = step dim += 1 # Complete the other dimensions, if needed if dim < len(self.shape): for diml in xrange(dim, len(self.shape)): startl[dim] = 0 stopl[dim] = self.shape[diml] stepl[dim] = 1 dim += 1 # Compute the shape for the container properly. Fixes #1288792 shape = [] for dim in xrange(len(self.shape)): # The negative division operates differently with python scalars # and numpy scalars (which are similar to C conventions). See: # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3 # and # http://www.peterbe.com/Integer-division-in-programming-languages # for more info on this issue. # I've finally decided to rely on the len(xrange) function. # F. Alted 2006-09-25 # Switch to `lrange` to allow long ranges (see #99). #new_dim = ((stopl[dim] - startl[dim] - 1) / stepl[dim]) + 1 new_dim = lrange(startl[dim], stopl[dim], stepl[dim]).length if not (new_dim == 1 and stop_None[dim]): #if not stop_None[dim]: # Append dimension shape.append(new_dim) return startl, stopl, stepl, shape
def eval(self): """Evaluate the expression and return the outcome. Because of performance reasons, the computation order tries to go along the common main dimension of all inputs. If not such a common main dimension is found, the iteration will go along the leading dimension instead. For non-consistent shapes in inputs (i.e. shapes having a different number of dimensions), the regular NumPy broadcast rules applies. There is one exception to this rule though: when the dimensions orthogonal to the main dimension of the expression are consistent, but the main dimension itself differs among the inputs, then the shortest one is chosen for doing the computations. This is so because trying to expand very large on-disk arrays could be too expensive or simply not possible. Also, the regular Numexpr casting rules (which are similar to those of NumPy, although you should check the Numexpr manual for the exceptions) are applied to determine the output type. Finally, if the setOuput() method specifying a user container has already been called, the output is sent to this user-provided container. If not, a fresh NumPy container is returned instead. .. warning:: When dealing with large on-disk inputs, failing to specify an on-disk container may consume all your available memory. """ values, shape, maindim = self.values, self.shape, self.maindim # Get different info we need for the main computation loop (i_nrows, slice_pos, start, stop, step, nrowsinbuf, out, o_maindim, o_start, o_stop, o_step) = \ self._get_info(shape, maindim) if i_nrows == 0: # No elements to compute return self._single_row_out # Create a key that selects every element in inputs and output # (including the main dimension) i_slices = [slice(None)] * (maindim + 1) o_slices = [slice(None)] * (o_maindim + 1) # This is a hack to prevent doing unnecessary flavor conversions # while reading buffers for val in values: if hasattr(val, 'maindim'): val._v_convert = False # Start the computation itself for start2 in lrange(start, stop, step * nrowsinbuf): stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice for inputs i_slices[maindim] = slice(start2, stop2, step) # Get the input values vals = [] for i, val in enumerate(values): if i in slice_pos: vals.append(val.__getitem__(tuple(i_slices))) else: # A read of values is not apparently needed, as PyTables # leaves seems to work just fine inside Numexpr vals.append(val) # Do the actual computation for this slice rout = self._compiled_expr(*vals) # Set the values into the out buffer if self.append_mode: out.append(rout) else: # Compute the slice to be filled in output start3 = o_start + (start2 - start) / step stop3 = start3 + nrowsinbuf * o_step if stop3 > o_stop: stop3 = o_stop o_slices[o_maindim] = slice(start3, stop3, o_step) # Set the slice out[tuple(o_slices)] = rout # Activate the conversion again (default) for val in values: if hasattr(val, 'maindim'): val._v_convert = True return out
def _get_info(self, shape, maindim, itermode=False): """Return various info needed for evaluating the computation loop.""" # Compute the shape of the resulting container having # in account new possible values of start, stop and step in # the inputs range if maindim is not None: (start, stop, step) = getIndices(self.start, self.stop, self.step, shape[maindim]) shape[maindim] = min(shape[maindim], lrange(start, stop, step).length) i_nrows = shape[maindim] else: start, stop, step = 0, 0, None i_nrows = 0 if not itermode: # Create a container for output if not defined yet o_maindim = 0 # Default maindim if self.out is None: out = np.empty(shape, dtype=self._single_row_out.dtype) # Get the trivial values for start, stop and step if maindim is not None: (o_start, o_stop, o_step) = (0, shape[maindim], 1) else: (o_start, o_stop, o_step) = (0, 0, 1) else: out = self.out # Out container already provided. Do some sanity checks. if hasattr(out, "maindim"): o_maindim = out.maindim # Refine the shape of the resulting container having in # account new possible values of start, stop and step in # the output range o_shape = list(out.shape) (o_start, o_stop, o_step) = getIndices(self.o_start, self.o_stop, self.o_step, o_shape[o_maindim]) o_shape[o_maindim] = min( o_shape[o_maindim], lrange(o_start, o_stop, o_step).length) # Check that the shape of output is consistent with inputs tr_oshape = list(o_shape) # this implies a copy olen_ = tr_oshape.pop(o_maindim) tr_shape = list(shape) # do a copy if maindim is not None: len_ = tr_shape.pop(o_maindim) else: len_ = 1 if tr_oshape != tr_shape: raise ValueError( "Shape for out container does not match expression") # Force the input length to fit in `out` if not self.append_mode and olen_ < len_: shape[o_maindim] = olen_ stop = start + olen_ # Get the positions of inputs that should be sliced (the others # will be broadcasted) ndim = len(shape) slice_pos = [ i for i, val in enumerate(self.values) if len(val.shape) == ndim ] # The size of the I/O buffer nrowsinbuf = 1 for i, val in enumerate(self.values): # Skip scalar values in variables if i in slice_pos: nrows = self._calc_nrowsinbuf(val) if nrows > nrowsinbuf: nrowsinbuf = nrows if not itermode: return (i_nrows, slice_pos, start, stop, step, nrowsinbuf, out, o_maindim, o_start, o_stop, o_step) else: # For itermode, we don't need the out info return (i_nrows, slice_pos, start, stop, step, nrowsinbuf)
def eval(self): """Evaluate the expression and return the outcome. Because of performance reasons, the computation order tries to go along the common main dimension of all inputs. If not such a common main dimension is found, the iteration will go along the leading dimension instead. For non-consistent shapes in inputs (i.e. shapes having a different number of dimensions), the regular NumPy broadcast rules applies. There is one exception to this rule though: when the dimensions orthogonal to the main dimension of the expression are consistent, but the main dimension itself differs among the inputs, then the shortest one is chosen for doing the computations. This is so because trying to expand very large on-disk arrays could be too expensive or simply not possible. Also, the regular Numexpr casting rules (which are similar to those of NumPy, although you should check the Numexpr manual for the exceptions) are applied to determine the output type. Finally, if the setOuput() method specifying a user container has already been called, the output is sent to this user-provided container. If not, a fresh NumPy container is returned instead. .. warning:: When dealing with large on-disk inputs, failing to specify an on-disk container may consume all your available memory. """ values, shape, maindim = self.values, self.shape, self.maindim # Get different info we need for the main computation loop (i_nrows, slice_pos, start, stop, step, nrowsinbuf, out, o_maindim, o_start, o_stop, o_step) = \ self._get_info(shape, maindim) if i_nrows == 0: # No elements to compute return self._single_row_out # Create a key that selects every element in inputs and output # (including the main dimension) i_slices = [slice(None)]*(maindim+1) o_slices = [slice(None)]*(o_maindim+1) # This is a hack to prevent doing unnecessary flavor conversions # while reading buffers for val in values: if hasattr(val, 'maindim'): val._v_convert = False # Start the computation itself for start2 in lrange(start, stop, step*nrowsinbuf): stop2 = start2 + step * nrowsinbuf if stop2 > stop: stop2 = stop # Set the proper slice for inputs i_slices[maindim] = slice(start2, stop2, step) # Get the input values vals = [] for i, val in enumerate(values): if i in slice_pos: vals.append(val.__getitem__(tuple(i_slices))) else: # A read of values is not apparently needed, as PyTables # leaves seems to work just fine inside Numexpr vals.append(val) # Do the actual computation for this slice rout = self._compiled_expr(*vals) # Set the values into the out buffer if self.append_mode: out.append(rout) else: # Compute the slice to be filled in output start3 = o_start + (start2-start)/step stop3 = start3 + nrowsinbuf*o_step if stop3 > o_stop: stop3 = o_stop o_slices[o_maindim] = slice(start3, stop3, o_step) # Set the slice out[tuple(o_slices)] = rout # Activate the conversion again (default) for val in values: if hasattr(val, 'maindim'): val._v_convert = True return out
def _get_info(self, shape, maindim, itermode=False): """Return various info needed for evaluating the computation loop.""" # Compute the shape of the resulting container having # in account new possible values of start, stop and step in # the inputs range if maindim is not None: (start, stop, step) = getIndices( self.start, self.stop, self.step, shape[maindim]) shape[maindim] = min( shape[maindim], lrange(start, stop, step).length) i_nrows = shape[maindim] else: start, stop, step = 0, 0, None i_nrows = 0 if not itermode: # Create a container for output if not defined yet o_maindim = 0 # Default maindim if self.out is None: out = np.empty(shape, dtype=self._single_row_out.dtype) # Get the trivial values for start, stop and step if maindim is not None: (o_start, o_stop, o_step) = (0, shape[maindim], 1) else: (o_start, o_stop, o_step) = (0, 0, 1) else: out = self.out # Out container already provided. Do some sanity checks. if hasattr(out, "maindim"): o_maindim = out.maindim # Refine the shape of the resulting container having in # account new possible values of start, stop and step in # the output range o_shape = list(out.shape) (o_start, o_stop, o_step) = getIndices( self.o_start, self.o_stop, self.o_step, o_shape[o_maindim]) o_shape[o_maindim] = min(o_shape[o_maindim], lrange(o_start, o_stop, o_step).length) # Check that the shape of output is consistent with inputs tr_oshape = list(o_shape) # this implies a copy olen_ = tr_oshape.pop(o_maindim) tr_shape = list(shape) # do a copy if maindim is not None: len_ = tr_shape.pop(o_maindim) else: len_ = 1 if tr_oshape != tr_shape: raise ValueError( "Shape for out container does not match expression") # Force the input length to fit in `out` if not self.append_mode and olen_ < len_: shape[o_maindim] = olen_ stop = start + olen_ # Get the positions of inputs that should be sliced (the others # will be broadcasted) ndim = len(shape) slice_pos = [i for i, val in enumerate(self.values) if len(val.shape) == ndim] # The size of the I/O buffer nrowsinbuf = 1 for i, val in enumerate(self.values): # Skip scalar values in variables if i in slice_pos: nrows = self._calc_nrowsinbuf(val) if nrows > nrowsinbuf: nrowsinbuf = nrows if not itermode: return (i_nrows, slice_pos, start, stop, step, nrowsinbuf, out, o_maindim, o_start, o_stop, o_step) else: # For itermode, we don't need the out info return (i_nrows, slice_pos, start, stop, step, nrowsinbuf)
def _interpret_indexing(self, keys): """Internal routine used by __getitem__ and __setitem__""" maxlen = len(self.shape) shape = (maxlen,) startl = numpy.empty(shape=shape, dtype=SizeType) stopl = numpy.empty(shape=shape, dtype=SizeType) stepl = numpy.empty(shape=shape, dtype=SizeType) stop_None = numpy.zeros(shape=shape, dtype=SizeType) if not isinstance(keys, tuple): keys = (keys,) nkeys = len(keys) dim = 0 # Here is some problem when dealing with [...,...] params # but this is a bit weird way to pass parameters anyway for key in keys: ellipsis = 0 # Sentinel if isinstance(key, types.EllipsisType): ellipsis = 1 for diml in xrange(dim, len(self.shape) - (nkeys - dim) + 1): startl[dim] = 0 stopl[dim] = self.shape[diml] stepl[dim] = 1 dim += 1 elif dim >= maxlen: raise IndexError, "Too many indices for object '%s'" % \ self._v_pathname elif is_idx(key): # Protection for index out of range if key >= self.shape[dim]: raise IndexError, "Index out of range" if key < 0: # To support negative values (Fixes bug #968149) key += self.shape[dim] start, stop, step = self._processRange( key, key+1, 1, dim=dim ) stop_None[dim] = 1 elif isinstance(key, slice): start, stop, step = self._processRange( key.start, key.stop, key.step, dim=dim ) else: raise TypeError, "Non-valid index or slice: %s" % \ key if not ellipsis: startl[dim] = start stopl[dim] = stop stepl[dim] = step dim += 1 # Complete the other dimensions, if needed if dim < len(self.shape): for diml in xrange(dim, len(self.shape)): startl[dim] = 0 stopl[dim] = self.shape[diml] stepl[dim] = 1 dim += 1 # Compute the shape for the container properly. Fixes #1288792 shape = [] for dim in xrange(len(self.shape)): # The negative division operates differently with python scalars # and numpy scalars (which are similar to C conventions). See: # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3 # and # http://www.peterbe.com/Integer-division-in-programming-languages # for more info on this issue. # I've finally decided to rely on the len(xrange) function. # F. Alted 2006-09-25 # Switch to `lrange` to allow long ranges (see #99). #new_dim = ((stopl[dim] - startl[dim] - 1) / stepl[dim]) + 1 new_dim = lrange(startl[dim], stopl[dim], stepl[dim]).length if not (new_dim == 1 and stop_None[dim]): shape.append(new_dim) return startl, stopl, stepl, shape