def first(self): """ Return the first element of an array """ from bolt.local.array import BoltArrayLocal rdd = self._rdd if self._ordered else self._rdd.sortByKey() return BoltArrayLocal(rdd.values().first())
def reduce(self, func, axis=(0, )): """ Reduce an array along an axis. Applies a function of two arguments cumlutatively to all arrays along an axis. Parameters ---------- func : function Function of two arrays that returns a single array axis : tuple or int, optional, default=(0,) Axis or multiple axes to reduce along. Returns ------- BoltSparkArray """ from bolt.local.array import BoltArrayLocal from numpy import ndarray axis = tupleize(axis) swapped = self._align(axis) arr = swapped._rdd.values().reduce(func) if not isinstance(arr, ndarray): # the result of a reduce can also be a scalar return arr elif arr.shape == (1, ): # ndarrays with single values in them should be converted into scalars return arr[0] return BoltArrayLocal(arr)
def _stat(self, axis=None, func=None, name=None): """ Compute a statistic over an axis. Can provide either a function (for use in a reduce) or a name (for use by a stat counter). Parameters ---------- axis : tuple or int, optional, default=None Axis to compute statistic over, if None will compute over all axes func : function, optional, default=None Function for reduce, see BoltArraySpark.reduce name : str A named statistic, see StatCounter """ if axis is None: axis = list(range(len(self.shape))) axis = tupleize(axis) if func and not name: return self.reduce(func, axis) if name and not func: from bolt.local.array import BoltArrayLocal swapped = self._align(axis) def reducer(left, right): return left.combine(right) counter = swapped._rdd.values()\ .mapPartitions(lambda i: [StatCounter(values=i, stats=name)])\ .reduce(reducer) res = BoltArrayLocal(getattr(counter, name)) return res.toscalar() else: raise ValueError( 'Must specify either a function or a statistic name.')
def _stat(self, axis=None, func=None, name=None): """ Compute a statistic over an axis. Can provide either a function (for use in a reduce) or a name (for use by a stat counter). Parameters ---------- axis : tuple or int, optional, default=None Axis to compute statistic over, if None will compute over all axes func : function, optional, default=None Function for reduce, see BoltArraySpark.reduce name : str A named statistic, see StatCounter """ if axis is None: axis = list(range(len(self.shape))) axis = tupleize(axis) if func and not name: return self.reduce(func, axis) if name and not func: from bolt.local.array import BoltArrayLocal swapped = self._align(axis) def reducer(left, right): return left.combine(right) counter = swapped._rdd.values()\ .mapPartitions(lambda i: [StatCounter(values=i, stats=name)])\ .reduce(reducer) res = BoltArrayLocal(getattr(counter, name)) return res.toscalar() else: raise ValueError('Must specify either a function or a statistic name.')
def concatenate(arrays, axis=0): """ Join a sequence of arrays together. Parameters ---------- arrays : tuple A sequence of array-like e.g. (a1, a2, ...) axis : int, optional, default=0 The axis along which the arrays will be joined. Returns ------- BoltArrayLocal """ if not isinstance(arrays, tuple): raise ValueError("data type not understood") arrays = tuple([asarray(a) for a in arrays]) from numpy import concatenate return BoltArrayLocal(concatenate(arrays, axis))
def reduce(self, func, axis=(0, ), keepdims=False): """ Reduce an array along an axis. Applies a commutative/associative function of two arguments cumulatively to all arrays along an axis. Array will be aligned so that the desired set of axes are in the keys, which may incur a swap. Parameters ---------- func : function Function of two arrays that returns a single array axis : tuple or int, optional, default=(0,) Axis or multiple axes to reduce along. Returns ------- BoltArraySpark """ from bolt.local.array import BoltArrayLocal from numpy import ndarray axis = tupleize(axis) swapped = self._align(axis) arr = swapped._rdd.values().treeReduce(func, depth=3) if keepdims: for i in axis: arr = expand_dims(arr, axis=i) if not isinstance(arr, ndarray): # the result of a reduce can also be a scalar return arr elif arr.shape == (1, ): # ndarrays with single values in them should be converted into scalars return arr[0] return BoltArrayLocal(arr)
def array(a, dtype=None, order='C'): """ Create a local bolt array. Parameters ---------- a : array-like An array, any object exposing the array interface, an object whose __array__ method returns an array, or any (nested) sequence. dtype : data-type, optional, default=None The desired data-type for the array. If None, will be determined from the data. (see numpy) order : {'C', 'F', 'A'}, optional, default='C' The order of the array. (see numpy) Returns ------- BoltArrayLocal """ return BoltArrayLocal(asarray(a, dtype, order))
def tolocal(self): """ Returns a local bolt array by first collecting as an array. """ from bolt.local.array import BoltArrayLocal return BoltArrayLocal(self.toarray())
def first(self): """ Return the first element of an array """ from bolt.local.array import BoltArrayLocal return BoltArrayLocal(self._rdd.values().first())
def _wrap(func, shape, dtype, order): return BoltArrayLocal(func(shape, dtype, order))
def tolocal(self): from bolt.local.array import BoltArrayLocal return BoltArrayLocal(self.toarray())