Example #1
0
    def swap(self, key_axes, value_axes, size=150):

        key_axes, value_axes = tupleize(key_axes), tupleize(value_axes)

        if len(key_axes) == self.keys.ndim and len(value_axes) == 0:
            raise ValueError('Cannot perform a swap that would '
                             'end up with all data on a single key')

        if len(key_axes) == 0 and len(value_axes) == 0:
            return self

        if self.values.ndim == 0:
            rdd = self._rdd.mapValues(lambda v: array(v, ndmin=1))
            value_shape = (1,)
        else:
            rdd = self._rdd
            value_shape = self.values.shape

        from bolt.spark.swap import Swapper, Dims

        k = Dims(shape=self.keys.shape, axes=key_axes)
        v = Dims(shape=value_shape, axes=value_axes)
        s = Swapper(k, v, self.dtype, size)

        chunks = s.chunk(rdd)
        rdd = s.extract(chunks)

        shape = s.getshape()
        split = self.split - len(key_axes) + len(value_axes)

        if self.values.ndim == 0:
            rdd = rdd.mapValues(lambda v: v.squeeze())
            shape = shape[:-1]

        return self._constructor(rdd, shape=tuple(shape), split=split)
Example #2
0
    def swap(self, key_axes, value_axes, size=150):

        key_axes, value_axes = tupleize(key_axes), tupleize(value_axes)

        if len(key_axes) == self.keys.ndim and len(value_axes) == 0:
            raise ValueError('Cannot perform a swap that would '
                             'end up with all data on a single key')

        if len(key_axes) == 0 and len(value_axes) == 0:
            return self

        if self.values.ndim == 0:
            rdd = self._rdd.mapValues(lambda v: array(v, ndmin=1))
            value_shape = (1, )
        else:
            rdd = self._rdd
            value_shape = self.values.shape

        from bolt.spark.swap import Swapper, Dims

        k = Dims(shape=self.keys.shape, axes=key_axes)
        v = Dims(shape=value_shape, axes=value_axes)
        s = Swapper(k, v, self.dtype, size)

        chunks = s.chunk(rdd)
        rdd = s.extract(chunks)

        shape = s.getshape()
        split = self.split - len(key_axes) + len(value_axes)

        if self.values.ndim == 0:
            rdd = rdd.mapValues(lambda v: v.squeeze())
            shape = shape[:-1]

        return self._constructor(rdd, shape=tuple(shape), split=split)
Example #3
0
    def chunk(self, key_axes, value_axes, size):

        if len(key_axes) == 0 and len(value_axes) == 0:
            return self

        from bolt.spark.swap import Swapper, Dims

        k = Dims(shape=self.keys.shape, axes=key_axes)
        v = Dims(shape=self.values.shape, axes=value_axes)
        s = Swapper(k, v, self.dtype, size)
        return s.chunk(self._rdd)
Example #4
0
    def chunk(self, key_axes, value_axes, size):

        if len(key_axes) == 0 and len(value_axes) == 0:
            return self

        from bolt.spark.swap import Swapper, Dims

        k = Dims(shape=self.keys.shape, axes=key_axes)
        v = Dims(shape=self.values.shape, axes=value_axes)
        s = Swapper(k, v, self.dtype, size)
        return s.chunk(self._rdd)