コード例 #1
0
ファイル: array.py プロジェクト: kr-hansen/bolt
    def __div__(self, arry):
        """
        Divide this array by another array (arry) element-wise.  Always use true division

        Paramters
        ---------
        arry : ndarray, BoltArrayLocal, or BoltArraySpark
            Another array to divide by element-wise

        Returns
        -------
        BoltArraySpark
        """
        if isinstance(arry, ndarray):
            from bolt.spark.construct import ConstructSpark
            arry = ConstructSpark.array(arry,
                                        self._rdd.context,
                                        axis=range(0, self.split))
        else:
            if not isinstance(arry, BoltArraySpark):
                raise ValueError(
                    "other must be local array or spark array, got %s" %
                    type(arry))

        if not all([x == y for (x, y) in zip(self.shape, arry.shape)]):
            raise ValueError(
                "All the input array dimensions must match exactly")

        rdd = self._rdd.join(arry._rdd).mapValues(lambda x: x[0] / x[1])
        return self._constructor(rdd).__finalize__(self)
コード例 #2
0
    def concatenate(self, arry, axis=0):
        """
        Join this array with another array.

        Paramters
        ---------
        arry : ndarray, BoltArrayLocal, or BoltArraySpark
            Another array to concatenate with

        axis : int, optional, default=0
            The axis along which arrays will be joined.

        Returns
        -------
        BoltArraySpark
        """
        if isinstance(arry, ndarray):
            from bolt.spark.construct import ConstructSpark
            arry = ConstructSpark.array(arry,
                                        self._rdd.context,
                                        axis=range(0, self.split))
        else:
            if not isinstance(arry, BoltArraySpark):
                raise ValueError(
                    "other must be local array or spark array, got %s" %
                    type(arry))

        if not all([
                x == y if not i == axis else True
                for i, (x, y) in enumerate(zip(self.shape, arry.shape))
        ]):
            raise ValueError("all the input array dimensions except for "
                             "the concatenation axis must match exactly")

        if not self.split == arry.split:
            raise NotImplementedError("two arrays must have the same split ")

        if axis < self.split:
            shape = self.keys.shape

            def key_func(key):
                key = list(key)
                key[axis] += shape[axis]
                return tuple(key)

            rdd = self._rdd.union(
                arry._rdd.map(lambda kv: (key_func(kv[0]), kv[1])))

        else:
            from numpy import concatenate as npconcatenate
            shift = axis - self.split
            rdd = self._rdd.join(arry._rdd).map(
                lambda kv: (kv[0], npconcatenate(kv[1], axis=shift)))

        shape = tuple([
            x + y if i == axis else x
            for i, (x, y) in enumerate(zip(self.shape, arry.shape))
        ])

        return self._constructor(rdd, shape=shape).__finalize__(self)
コード例 #3
0
ファイル: array.py プロジェクト: andrewosh/bolt
    def concatenate(self, arry, axis=0):
        """
        Join this array with another array.

        Paramters
        ---------
        arry : ndarray, BoltArrayLocal, or BoltArraySpark
            Another array to concatenate with

        axis : int, optional, default=0
            The axis along which arrays will be joined.

        Returns
        -------
        BoltArraySpark
        """
        if isinstance(arry, ndarray):
            from bolt.spark.construct import ConstructSpark
            arry = ConstructSpark.array(arry, self._rdd.context, axis=range(0, self.split))
        else:
            if not isinstance(arry, BoltArraySpark):
                raise ValueError("other must be local array or spark array, got %s" % type(arry))

        if not all([x == y if not i == axis else True
                    for i, (x, y) in enumerate(zip(self.shape, arry.shape))]):
            raise ValueError("all the input array dimensions except for "
                             "the concatenation axis must match exactly")

        if not self.split == arry.split:
            raise NotImplementedError("two arrays must have the same split ")

        if axis < self.split:
            shape = self.keys.shape

            def key_func(key):
                key = list(key)
                key[axis] += shape[axis]
                return tuple(key)

            rdd = self._rdd.union(arry._rdd.map(lambda kv: (key_func(kv[0]), kv[1])))

        else:
            from numpy import concatenate as npconcatenate
            shift = axis - self.split
            rdd = self._rdd.join(arry._rdd).map(lambda kv: (kv[0], npconcatenate(kv[1], axis=shift)))

        shape = tuple([x + y if i == axis else x
                      for i, (x, y) in enumerate(zip(self.shape, arry.shape))])

        return self._constructor(rdd, shape=shape).__finalize__(self)
コード例 #4
0
 def array(a, context=None, axis=(0, ), dtype=None, npartitions=None):
     return cs.array(a,
                     context=LocalSparkContext(),
                     axis=axis,
                     dtype=dtype,
                     npartitions=npartitions)