def __div__(self, arry): """ Divide this array by another array (arry) element-wise. Always use true division Paramters --------- arry : ndarray, BoltArrayLocal, or BoltArraySpark Another array to divide by element-wise Returns ------- BoltArraySpark """ if isinstance(arry, ndarray): from bolt.spark.construct import ConstructSpark arry = ConstructSpark.array(arry, self._rdd.context, axis=range(0, self.split)) else: if not isinstance(arry, BoltArraySpark): raise ValueError( "other must be local array or spark array, got %s" % type(arry)) if not all([x == y for (x, y) in zip(self.shape, arry.shape)]): raise ValueError( "All the input array dimensions must match exactly") rdd = self._rdd.join(arry._rdd).mapValues(lambda x: x[0] / x[1]) return self._constructor(rdd).__finalize__(self)
def concatenate(self, arry, axis=0): """ Join this array with another array. Paramters --------- arry : ndarray, BoltArrayLocal, or BoltArraySpark Another array to concatenate with axis : int, optional, default=0 The axis along which arrays will be joined. Returns ------- BoltArraySpark """ if isinstance(arry, ndarray): from bolt.spark.construct import ConstructSpark arry = ConstructSpark.array(arry, self._rdd.context, axis=range(0, self.split)) else: if not isinstance(arry, BoltArraySpark): raise ValueError( "other must be local array or spark array, got %s" % type(arry)) if not all([ x == y if not i == axis else True for i, (x, y) in enumerate(zip(self.shape, arry.shape)) ]): raise ValueError("all the input array dimensions except for " "the concatenation axis must match exactly") if not self.split == arry.split: raise NotImplementedError("two arrays must have the same split ") if axis < self.split: shape = self.keys.shape def key_func(key): key = list(key) key[axis] += shape[axis] return tuple(key) rdd = self._rdd.union( arry._rdd.map(lambda kv: (key_func(kv[0]), kv[1]))) else: from numpy import concatenate as npconcatenate shift = axis - self.split rdd = self._rdd.join(arry._rdd).map( lambda kv: (kv[0], npconcatenate(kv[1], axis=shift))) shape = tuple([ x + y if i == axis else x for i, (x, y) in enumerate(zip(self.shape, arry.shape)) ]) return self._constructor(rdd, shape=shape).__finalize__(self)
def concatenate(self, arry, axis=0): """ Join this array with another array. Paramters --------- arry : ndarray, BoltArrayLocal, or BoltArraySpark Another array to concatenate with axis : int, optional, default=0 The axis along which arrays will be joined. Returns ------- BoltArraySpark """ if isinstance(arry, ndarray): from bolt.spark.construct import ConstructSpark arry = ConstructSpark.array(arry, self._rdd.context, axis=range(0, self.split)) else: if not isinstance(arry, BoltArraySpark): raise ValueError("other must be local array or spark array, got %s" % type(arry)) if not all([x == y if not i == axis else True for i, (x, y) in enumerate(zip(self.shape, arry.shape))]): raise ValueError("all the input array dimensions except for " "the concatenation axis must match exactly") if not self.split == arry.split: raise NotImplementedError("two arrays must have the same split ") if axis < self.split: shape = self.keys.shape def key_func(key): key = list(key) key[axis] += shape[axis] return tuple(key) rdd = self._rdd.union(arry._rdd.map(lambda kv: (key_func(kv[0]), kv[1]))) else: from numpy import concatenate as npconcatenate shift = axis - self.split rdd = self._rdd.join(arry._rdd).map(lambda kv: (kv[0], npconcatenate(kv[1], axis=shift))) shape = tuple([x + y if i == axis else x for i, (x, y) in enumerate(zip(self.shape, arry.shape))]) return self._constructor(rdd, shape=shape).__finalize__(self)
def array(a, context=None, axis=(0, ), dtype=None, npartitions=None): return cs.array(a, context=LocalSparkContext(), axis=axis, dtype=dtype, npartitions=npartitions)