def _align(self, axes, key_shape=None): """ Align local arrays so that axes for iteration are in the keys. This operation is applied before most functional operators. It ensures that the specified axes are valid, and might transpose/reshape the underlying array so that the functional operators can be applied over the correct records. Parameters ---------- axes: tuple[int] One or more axes that will be iterated over by a functional operator """ # ensure that the key axes are valid for an ndarray of this shape inshape(self.shape, axes) # compute the set of dimensions/axes that will be used to reshape remaining = [dim for dim in range(len(self.shape)) if dim not in axes] key_shape = key_shape if key_shape else [self.shape[axis] for axis in axes] remaining_shape = [self.shape[axis] for axis in remaining] linearized_shape = [prod(key_shape)] + remaining_shape # compute the transpose permutation transpose_order = list(axes) + remaining # transpose the array so that the keys being mapped over come first, then linearize keys reshaped = self.values.transpose(*transpose_order).reshape(*linearized_shape) return reshaped
def _align(self, axis): """ Align spark bolt array so that axes for iteration are in the keys. This operation is applied before most functional operators. It ensures that the specified axes are valid, and swaps key/value axes so that functional operators can be applied over the correct records. Parameters ---------- axis: tuple[int] One or more axes that wil be iterated over by a functional operator Returns ------- BoltArraySpark """ # ensure that the specified axes are valid inshape(self.shape, axis) # find the value axes that should be moved into the keys (axis >= split) tokeys = [(a - self.split) for a in axis if a >= self.split] # find the key axes that should be moved into the values (axis < split) tovalues = [a for a in range(self.split) if a not in axis] if tokeys or tovalues: return self.swap(tovalues, tokeys) else: return self
def _align(self, axes, key_shape=None): """ Align local arrays so that axes for iteration are in the keys. This operation is applied before most functional operators. It ensures that the specified axes are valid, and might transpose/reshape the underlying array so that the functional operators can be applied over the correct records. Parameters ---------- axes: tuple[int] One or more axes that will be iterated over by a functional operator """ # ensure that the key axes are valid for an ndarray of this shape inshape(self.shape, axes) # compute the set of dimensions/axes that will be used to reshape remaining = [dim for dim in range(len(self.shape)) if dim not in axes] key_shape = key_shape if key_shape else [ self.shape[axis] for axis in axes ] remaining_shape = [self.shape[axis] for axis in remaining] linearized_shape = [prod(key_shape)] + remaining_shape # compute the transpose permutation transpose_order = list(axes) + remaining # transpose the array so that the keys being mapped over come first, then linearize keys reshaped = self.values.transpose(*transpose_order).reshape( *linearized_shape) return reshaped
def reduce(self, func, axis=0): """ """ axes = sorted(tupleize(axis)) # if the function is a ufunc, it can automatically handle reducing over multiple axes if isinstance(func, ufunc): inshape(self.shape, axes) reduced = func.reduce(self, axis=tuple(axes)) else: reshaped = self._align(axes) reduced = reduce(func, reshaped) new_array = self._constructor(reduced) # ensure that the shape of the reduced array is valid expected_shape = [ self.shape[i] for i in range(len(self.shape)) if i not in axes ] if new_array.shape != tuple(expected_shape): raise ValueError( "reduce did not yield a BoltArray with valid dimensions") return new_array
def _align(self, axes, key_shape=None): # ensure that the key axes are valid for an ndarray of this shape inshape(self.shape, axes) # compute the set of dimensions/axes that will be used to reshape remaining = [dim for dim in range(len(self.shape)) if dim not in axes] key_shape = key_shape if key_shape else [self.shape[axis] for axis in axes] remaining_shape = [self.shape[axis] for axis in remaining] linearized_shape = [prod(key_shape)] + remaining_shape # compute the transpose permutation transpose_order = axes + remaining # transpose the array so that the keys being mapped over come first, then linearize keys reshaped = self.transpose(*transpose_order).reshape(*linearized_shape) return reshaped
def reduce(self, func, axis=0): """ Reduce an array along an axis. Applies an associative/commutative function of two arguments cumulatively to all arrays along an axis. Array will be aligned so that the desired set of axes are in the keys, which may require a transpose/reshape. Parameters ---------- func : function Function of two arrays that returns a single array axis : tuple or int, optional, default=(0,) Axis or multiple axes to reduce along. Returns ------- BoltArrayLocal """ axes = sorted(tupleize(axis)) # if the function is a ufunc, it can automatically handle reducing over multiple axes if isinstance(func, ufunc): inshape(self.shape, axes) reduced = func.reduce(self, axis=tuple(axes)) else: reshaped = self._align(axes) reduced = reduce(func, reshaped) new_array = self._constructor(reduced) # ensure that the shape of the reduced array is valid expected_shape = [ self.shape[i] for i in range(len(self.shape)) if i not in axes ] if new_array.shape != tuple(expected_shape): raise ValueError( "reduce did not yield a BoltArray with valid dimensions") return new_array
def _align(self, axes, key_shape=None): # ensure that the key axes are valid for an ndarray of this shape inshape(self.shape, axes) # compute the set of dimensions/axes that will be used to reshape remaining = [dim for dim in range(len(self.shape)) if dim not in axes] key_shape = key_shape if key_shape else [ self.shape[axis] for axis in axes ] remaining_shape = [self.shape[axis] for axis in remaining] linearized_shape = [prod(key_shape)] + remaining_shape # compute the transpose permutation transpose_order = axes + remaining # transpose the array so that the keys being mapped over come first, then linearize keys reshaped = self.transpose(*transpose_order).reshape(*linearized_shape) return reshaped
def reduce(self, func, axis=0): """ Reduce an array along an axis. Applies an associative/commutative function of two arguments cumulatively to all arrays along an axis. Array will be aligned so that the desired set of axes are in the keys, which may require a transpose/reshape. Parameters ---------- func : function Function of two arrays that returns a single array axis : tuple or int, optional, default=(0,) Axis or multiple axes to reduce along. Returns ------- BoltArrayLocal """ axes = sorted(tupleize(axis)) # if the function is a ufunc, it can automatically handle reducing over multiple axes if isinstance(func, ufunc): inshape(self.shape, axes) reduced = func.reduce(self, axis=tuple(axes)) else: reshaped = self._align(axes) reduced = reduce(func, reshaped) new_array = self._constructor(reduced) # ensure that the shape of the reduced array is valid expected_shape = [self.shape[i] for i in range(len(self.shape)) if i not in axes] if new_array.shape != tuple(expected_shape): raise ValueError("reduce did not yield a BoltArray with valid dimensions") return new_array
def reduce(self, func, axis=0): """ """ axes = sorted(tupleize(axis)) # if the function is a ufunc, it can automatically handle reducing over multiple axes if isinstance(func, ufunc): inshape(self.shape, axes) reduced = func.reduce(self, axis=tuple(axes)) else: reshaped = self._align(axes) reduced = reduce(func, reshaped) new_array = self._constructor(reduced) # ensure that the shape of the reduced array is valid expected_shape = [self.shape[i] for i in range(len(self.shape)) if i not in axes] if new_array.shape != tuple(expected_shape): raise ValueError("reduce did not yield a BoltArray with valid dimensions") return new_array