コード例 #1
0
ファイル: test_sparse.py プロジェクト: wycharry/mars-1
    def testSparseMinimum(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        np.testing.assert_array_equal(
            s1.minimum(s2).toarray(),
            self.s1.minimum(self.s2).toarray())
コード例 #2
0
ファイル: test_sparse.py プロジェクト: Haxine/mars-1
    def testSparseSubtract(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(s1 - s2, self.s1 - self.s2)
        self.assertArrayEqual(s1 - self.d1, self.s1 - self.d1)
        self.assertArrayEqual(self.d1 - s1, self.d1 - self.s1)
        r = sps.csr_matrix(
            ((self.s1.data - 1), self.s1.indices, self.s1.indptr),
            self.s1.shape)
        self.assertArrayEqual(s1 - 1, r)
        r = sps.csr_matrix(
            ((1 - self.s1.data), self.s1.indices, self.s1.indptr),
            self.s1.shape)
        self.assertArrayEqual(1 - s1, r)

        # test sparse vector
        v = SparseNDArray(self.v1, shape=(3, ))
        self.assertArrayEqual(v - v, self.v1_data - self.v1_data)
        self.assertArrayEqual(v - self.d1, self.v1_data - self.d1)
        self.assertArrayEqual(self.d1 - v, self.d1 - self.v1_data)
        r = sps.csr_matrix(
            ((self.v1.data - 1), self.v1.indices, self.v1.indptr),
            self.v1.shape)
        self.assertArrayEqual(v - 1, r.toarray().reshape(3))
        r = sps.csr_matrix(
            ((1 - self.v1.data), self.v1.indices, self.v1.indptr),
            self.v1.shape)
        self.assertArrayEqual(1 - v, r.toarray().reshape(3))
コード例 #3
0
ファイル: test_sparse.py プロジェクト: Haxine/mars-1
    def testSparseDot(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)
        v1 = SparseNDArray(self.v1, shape=(3, ))
        v2 = SparseNDArray(self.v2, shape=(2, ))

        self.assertArrayEqual(mls.dot(s1, s2.T), self.s1.dot(self.s2.T))
        self.assertArrayEqual(s1.dot(self.d1), self.s1.dot(self.d1))
        self.assertArrayEqual(self.d1.dot(s1.T),
                              self.d1.dot(self.s1.T.toarray()))

        self.assertArrayEqual(mls.tensordot(s1, s2.T, axes=(1, 0)),
                              self.s1.dot(self.s2.T))
        self.assertArrayEqual(mls.tensordot(s1, self.d1, axes=(1, -1)),
                              self.s1.dot(self.d1))
        self.assertArrayEqual(mls.tensordot(self.d1, s1.T, axes=(0, 0)),
                              self.d1.dot(self.s1.T.toarray()))

        self.assertArrayEqual(mls.dot(s1, v1), self.s1.dot(self.v1_data))
        self.assertArrayEqual(mls.dot(s2, v1), self.s2.dot(self.v1_data))
        self.assertArrayEqual(mls.dot(v2, s1), self.v2_data.dot(self.s1.A))
        self.assertArrayEqual(mls.dot(v2, s2), self.v2_data.dot(self.s2.A))
        self.assertArrayEqual(mls.dot(v1, v1), self.v1_data.dot(self.v1_data))
        self.assertArrayEqual(mls.dot(v2, v2), self.v2_data.dot(self.v2_data))

        self.assertArrayEqual(mls.dot(v2, s1, sparse=False),
                              self.v2_data.dot(self.s1.A))
        self.assertArrayEqual(mls.dot(v1, v1, sparse=False),
                              self.v1_data.dot(self.v1_data))
コード例 #4
0
ファイル: test_sparse.py プロジェクト: Haxine/mars-1
    def testSparseAdd(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(s1 + s2, self.s1 + self.s2)
        self.assertArrayEqual(s1 + self.d1, self.s1 + self.d1)
        self.assertArrayEqual(self.d1 + s1, self.d1 + self.s1)
        r = sps.csr_matrix(
            ((self.s1.data + 1), self.s1.indices, self.s1.indptr),
            self.s1.shape)
        self.assertArrayEqual(s1 + 1, r)
        r = sps.csr_matrix(
            ((1 + self.s1.data), self.s1.indices, self.s1.indptr),
            self.s1.shape)
        self.assertArrayEqual(1 + s1, r)

        # test sparse vector
        v = SparseNDArray(self.v1, shape=(3, ))
        self.assertArrayEqual(v + v, self.v1_data + self.v1_data)
        self.assertArrayEqual(v + self.d1, self.v1_data + self.d1)
        self.assertArrayEqual(self.d1 + v, self.d1 + self.v1_data)
        r = sps.csr_matrix(
            ((self.v1.data + 1), self.v1.indices, self.v1.indptr),
            self.v1.shape)
        self.assertArrayEqual(v + 1, r.toarray().reshape(3))
        r = sps.csr_matrix(
            ((1 + self.v1.data), self.v1.indices, self.v1.indptr),
            self.v1.shape)
        self.assertArrayEqual(1 + v, r.toarray().reshape(3))
コード例 #5
0
ファイル: test_sparse.py プロジェクト: Haxine/mars-1
    def testSparseBin(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)
        v1 = SparseNDArray(self.v1, shape=(3, ))

        for method in ('fmod', 'logaddexp', 'logaddexp2', 'equal', 'not_equal',
                       'less', 'less_equal', 'greater', 'greater_equal',
                       'hypot', 'arctan2'):
            lm, rm = getattr(mls, method), getattr(np, method)
            self.assertArrayEqual(lm(s1, s2),
                                  rm(self.s1.toarray(), self.s2.toarray()))
            self.assertArrayEqual(lm(s1, self.d1),
                                  rm(self.s1.toarray(), self.d1))
            self.assertArrayEqual(lm(self.d1, s1),
                                  rm(self.d1, self.s1.toarray()))
            r1 = sps.csr_matrix(
                (rm(self.s1.data, 2), self.s1.indices, self.s1.indptr),
                self.s1.shape)
            self.assertArrayEqual(lm(s1, 2), r1)
            r2 = sps.csr_matrix(
                (rm(2, self.s1.data), self.s1.indices, self.s1.indptr),
                self.s1.shape)
            self.assertArrayEqual(lm(2, s1), r2)

            # test sparse
            self.assertArrayEqual(lm(v1, v1), rm(self.v1_data, self.v1_data))
            self.assertArrayEqual(lm(v1, self.d1), rm(self.v1_data, self.d1))
            self.assertArrayEqual(lm(self.d1, v1), rm(self.d1, self.v1_data))
            self.assertArrayEqual(lm(v1, 2), rm(self.v1_data, 2))
            self.assertArrayEqual(lm(2, v1), rm(2, self.v1_data))
コード例 #6
0
ファイル: test_sparse.py プロジェクト: wycharry/mars-1
    def testSparseAdd(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(s1 + s2, self.s1 + self.s2)
        self.assertArrayEqual(s1 + self.d1, self.s1 + self.d1)
        self.assertArrayEqual(self.d1 + s1, self.d1 + self.s1)
        self.assertArrayEqual(s1 + 1, self.s1.toarray() + 1)
        self.assertArrayEqual(1 + s1, self.s1.toarray() + 1)
コード例 #7
0
ファイル: test_sparse.py プロジェクト: wycharry/mars-1
    def testSparseSubtract(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(s1 - s2, self.s1 - self.s2)
        self.assertArrayEqual(s1 - self.d1, self.s1 - self.d1)
        self.assertArrayEqual(self.d1 - s1, self.d1 - self.s1)
        self.assertArrayEqual(s1 - 1, self.s1.toarray() - 1)
        self.assertArrayEqual(1 - s1, 1 - self.s1.toarray())
コード例 #8
0
    def testSparseMultiply(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(s1 * s2, self.s1.multiply(self.s2))
        self.assertArrayEqual(s1 * self.d1, self.s1.multiply(self.d1))
        self.assertArrayEqual(self.d1 * s1, self.s1.multiply(self.d1))
        self.assertArrayEqual(s1 * 2, self.s1 * 2)
        self.assertArrayEqual(2 * s1, self.s1 * 2)
コード例 #9
0
ファイル: test_sparse.py プロジェクト: qinxuye/mars
def test_sparse_sum():
    s1 = SparseNDArray(s1_data)
    v = SparseNDArray(v1, shape=(3, ))
    assert s1.sum() == s1.sum()
    np.testing.assert_array_equal(s1.sum(axis=1),
                                  np.asarray(s1.sum(axis=1)).reshape(2))
    np.testing.assert_array_equal(s1.sum(axis=0),
                                  np.asarray(s1.sum(axis=0)).reshape(3))
    np.testing.assert_array_equal(v.sum(), np.asarray(v1_data.sum()))
コード例 #10
0
    def testSparseFloorDivide(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(s1 // s2, self.s1.toarray() // self.s2.toarray())
        self.assertArrayEqual(s1 // self.d1, self.s1.toarray() // self.d1)
        self.assertArrayEqual(self.d1 // s1, self.d1 // self.s1.toarray())
        self.assertArrayEqual(s1 // 2, self.s1.toarray() // 2)
        self.assertArrayEqual(2 // s1, 2 // self.s1.toarray())
コード例 #11
0
    def testSparsePower(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(s1 ** s2, self.s1.toarray() ** self.s2.toarray())
        self.assertArrayEqual(s1 ** self.d1, self.s1.toarray() ** self.d1)
        self.assertArrayEqual(self.d1 ** s1, self.d1 ** self.s1.toarray())
        self.assertArrayEqual(s1 ** 2, self.s1.power(2))
        self.assertArrayEqual(2 ** s1, 2 ** self.s1.toarray())
コード例 #12
0
    def testSparseMod(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(s1 % s2, self.s1.toarray() % self.s2.toarray())
        self.assertArrayEqual(s1 % self.d1, self.s1.toarray() % self.d1)
        self.assertArrayEqual(self.d1 % s1, self.d1 % self.s1.toarray())
        self.assertArrayEqual(s1 % 2, self.s1.toarray() % 2)
        self.assertArrayEqual(2 % s1, 2 % self.s1.toarray())
コード例 #13
0
ファイル: test_sparse.py プロジェクト: qinxuye/mars
def test_sparse_minimum():
    s1 = SparseNDArray(s1_data)
    s2 = SparseNDArray(s2_data)

    np.testing.assert_array_equal(
        s1.minimum(s2).toarray(),
        s1.minimum(s2).toarray())

    v = SparseVector(v1, shape=(3, ))
    np.testing.assert_array_equal(v.minimum(d1), np.minimum(v1_data, d1))
コード例 #14
0
    def testEuclideanDistancesExecution(self):
        dense_raw_x = np.random.rand(30, 10)
        dense_raw_y = np.random.rand(40, 10)
        sparse_raw_x = SparseNDArray(
            sps.random(30, 10, density=0.5, format='csr'))
        sparse_raw_y = SparseNDArray(
            sps.random(40, 10, density=0.5, format='csr'))

        for raw_x, raw_y in [(dense_raw_x, dense_raw_y),
                             (sparse_raw_x, sparse_raw_y)]:
            x = mt.tensor(raw_x, chunk_size=9)
            y = mt.tensor(raw_y, chunk_size=7)

            distance = euclidean_distances(x, y)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x, Y=raw_y)
            np.testing.assert_almost_equal(result, expected)

            x_norm = x.sum(axis=1)[..., np.newaxis]
            y_norm = y.sum(axis=1)[np.newaxis, ...]
            distance = euclidean_distances(x,
                                           y,
                                           X_norm_squared=x_norm,
                                           Y_norm_squared=y_norm)
            x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis]
            y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...]

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x,
                                              raw_y,
                                              X_norm_squared=x_raw_norm,
                                              Y_norm_squared=y_raw_norm)
            np.testing.assert_almost_equal(result, expected)

            x_sq = (x**2).astype(np.float32)
            y_sq = (y**2).astype(np.float32)

            distance = euclidean_distances(x_sq, y_sq, squared=True)

            x_raw_sq = (raw_x**2).astype(np.float32)
            y_raw_sq = (raw_y**2).astype(np.float32)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True)
            np.testing.assert_almost_equal(result, expected, decimal=6)

            # test x is y
            distance = euclidean_distances(x)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x)

            np.testing.assert_almost_equal(result, expected)
コード例 #15
0
ファイル: test_sparse.py プロジェクト: Haxine/mars-1
    def testSparseMinimum(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        np.testing.assert_array_equal(
            s1.minimum(s2).toarray(),
            self.s1.minimum(self.s2).toarray())

        v1 = SparseVector(self.v1, shape=(3, ))
        np.testing.assert_array_equal(v1.minimum(self.d1),
                                      np.minimum(self.v1_data, self.d1))
コード例 #16
0
ファイル: test_sparse.py プロジェクト: Haxine/mars-1
 def testSparseSum(self):
     s1 = SparseNDArray(self.s1)
     v1 = SparseNDArray(self.v1, shape=(3, ))
     self.assertEqual(s1.sum(), self.s1.sum())
     np.testing.assert_array_equal(
         s1.sum(axis=1),
         np.asarray(self.s1.sum(axis=1)).reshape(2))
     np.testing.assert_array_equal(
         s1.sum(axis=0),
         np.asarray(self.s1.sum(axis=0)).reshape(3))
     np.testing.assert_array_equal(v1.sum(), np.asarray(self.v1_data.sum()))
コード例 #17
0
    def testSparseBin(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        for method in ('fmod', 'logaddexp', 'logaddexp2', 'equal', 'not_equal',
                       'less', 'less_equal', 'greater', 'greater_equal', 'hypot'):
            lm, rm = getattr(mls, method), getattr(np, method)
            self.assertArrayEqual(lm(s1, s2), rm(self.s1.toarray(), self.s2.toarray()))
            self.assertArrayEqual(lm(s1, self.d1), rm(self.s1.toarray(), self.d1))
            self.assertArrayEqual(lm(self.d1, s1), rm(self.d1, self.s1.toarray()))
            self.assertArrayEqual(lm(s1, 2), rm(self.s1.toarray(), 2))
            self.assertArrayEqual(lm(2, s1), rm(2, self.s1.toarray()))
コード例 #18
0
ファイル: test_sparse.py プロジェクト: qinxuye/mars
def test_sparse_creation():
    s = SparseNDArray(s1_data)
    assert s.ndim == 2
    assert isinstance(s, SparseMatrix)
    assertArrayEqual(s.toarray(), s1_data.A)
    assertArrayEqual(s.todense(), s1_data.A)

    v = SparseNDArray(v1, shape=(3, ))
    assert s.ndim
    assert isinstance(v, SparseVector)
    assert v.shape == (3, )
    assertArrayEqual(v.todense(), v1_data)
    assertArrayEqual(v.toarray(), v1_data)
    assertArrayEqual(v, v1_data)
コード例 #19
0
    def testSparseCreation(self):
        s = SparseNDArray(self.s1)
        self.assertEqual(s.ndim, 2)
        self.assertIsInstance(s, SparseMatrix)
        self.assertArrayEqual(s.toarray(), self.s1.A)
        self.assertArrayEqual(s.todense(), self.s1.A)

        v = SparseNDArray(self.v1, shape=(3,))
        self.assertTrue(s.ndim, 1)
        self.assertIsInstance(v, SparseVector)
        self.assertEqual(v.shape, (3,))
        self.assertArrayEqual(v.todense(), self.v1_data)
        self.assertArrayEqual(v.toarray(), self.v1_data)
        self.assertArrayEqual(v, self.v1_data)
コード例 #20
0
ファイル: test_sparse.py プロジェクト: qinxuye/mars
def test_sparse_unary():
    s1 = SparseNDArray(s1_data)
    v = SparseNDArray(v1, shape=(3, ))

    for method in ('negative', 'positive', 'absolute', 'abs', 'fabs', 'rint',
                   'sign', 'conj', 'exp', 'exp2', 'log', 'log2', 'log10',
                   'expm1', 'log1p', 'sqrt', 'square', 'cbrt', 'reciprocal',
                   'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan',
                   'arcsinh', 'arccosh', 'arctanh', 'deg2rad', 'rad2deg',
                   'angle', 'isnan', 'isinf', 'signbit', 'sinc', 'isreal',
                   'isfinite'):
        lm, rm = getattr(mls, method), getattr(np, method)
        r = sps.csr_matrix((rm(s1.data), s1.indices, s1.indptr), s1.shape)
        assertArrayEqual(lm(s1), r)
        assertArrayEqual(lm(v), rm(v1_data))
コード例 #21
0
    def testSparseAdd(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(s1 + s2, self.s1 + self.s2)
        self.assertArrayEqual(s1 + self.d1, self.s1 + self.d1)
        self.assertArrayEqual(self.d1 + s1, self.d1 + self.s1)
        self.assertArrayEqual(s1 + 1, self.s1.toarray() + 1)
        self.assertArrayEqual(1 + s1, self.s1.toarray() + 1)

        # test sparse vector
        v = SparseNDArray(self.v1, shape=(3,))
        self.assertArrayEqual(v + v, self.v1_data + self.v1_data)
        self.assertArrayEqual(v + self.d1, self.v1_data + self.d1)
        self.assertArrayEqual(self.d1 + v, self.d1 + self.v1_data)
コード例 #22
0
    def testSparseSubtract(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(s1 - s2, self.s1 - self.s2)
        self.assertArrayEqual(s1 - self.d1, self.s1 - self.d1)
        self.assertArrayEqual(self.d1 - s1, self.d1 - self.s1)
        self.assertArrayEqual(s1 - 1, self.s1.toarray() - 1)
        self.assertArrayEqual(1 - s1, 1 - self.s1.toarray())

        # test sparse vector
        v = SparseNDArray(self.v1, shape=(3,))
        self.assertArrayEqual(v - v, self.v1_data - self.v1_data)
        self.assertArrayEqual(v - self.d1, self.v1_data - self.d1)
        self.assertArrayEqual(self.d1 - v, self.d1 - self.v1_data)
コード例 #23
0
ファイル: test_sparse.py プロジェクト: wycharry/mars-1
    def testSparseDot(self):
        s1 = SparseNDArray(self.s1)
        s2 = SparseNDArray(self.s2)

        self.assertArrayEqual(mls.dot(s1, s2.T), self.s1.dot(self.s2.T))
        self.assertArrayEqual(s1.dot(self.d1), self.s1.dot(self.d1))
        self.assertArrayEqual(self.d1.dot(s1.T),
                              self.d1.dot(self.s1.T.toarray()))

        self.assertArrayEqual(mls.tensordot(s1, s2.T, axes=(1, 0)),
                              self.s1.dot(self.s2.T))
        self.assertArrayEqual(mls.tensordot(s1, self.d1, axes=(1, -1)),
                              self.s1.dot(self.d1))
        self.assertArrayEqual(mls.tensordot(self.d1, s1.T, axes=(0, 0)),
                              self.d1.dot(self.s1.T.toarray()))
コード例 #24
0
ファイル: test_sparse.py プロジェクト: qinxuye/mars
def test_sparse_multiply():
    s1 = SparseNDArray(s1_data)
    s2 = SparseNDArray(s2_data)

    assertArrayEqual(s1 * s2, s1_data.multiply(s2_data))
    assertArrayEqual(s1 * d1, s1_data.multiply(d1))
    assertArrayEqual(d1 * s1, s1_data.multiply(d1))
    assertArrayEqual(s1 * 2, s1 * 2)
    assertArrayEqual(2 * s1, s1 * 2)

    # test sparse vector
    v = SparseNDArray(v1, shape=(3, ))
    assertArrayEqual(v * v, v1_data * v1_data)
    assertArrayEqual(v * d1, v1_data * d1)
    assertArrayEqual(d1 * v, d1 * v1_data)
    r = sps.csr_matrix(((v1.data * 1), v1.indices, v1.indptr), v1.shape)
    assertArrayEqual(v * 1, r.toarray().reshape(3))
    r = sps.csr_matrix(((1 * v1.data), v1.indices, v1.indptr), v1.shape)
    assertArrayEqual(1 * v, r.toarray().reshape(3))
コード例 #25
0
ファイル: test_sparse.py プロジェクト: qinxuye/mars
def test_sparse_power():
    s1 = SparseNDArray(s1_data)
    s2 = SparseNDArray(s2_data)

    assertArrayEqual(s1**s2, s1.toarray()**s2.toarray())
    assertArrayEqual(s1**d1, s1.toarray()**d1)
    assertArrayEqual(d1**s1, d1**s1.toarray())
    assertArrayEqual(s1**2, s1_data.power(2))
    assertArrayEqual(2**s1, 2**s1.toarray())

    # test sparse vector
    v = SparseNDArray(v1, shape=(3, ))
    assertArrayEqual(v**v, v1_data**v1_data)
    assertArrayEqual(v**d1, v1_data**d1)
    assertArrayEqual(d1**v, d1**v1_data)
    r = sps.csr_matrix(((v1.data**1), v1.indices, v1.indptr), v1.shape)
    assertArrayEqual(v**1, r.toarray().reshape(3))
    r = sps.csr_matrix(((1**v1.data), v1.indices, v1.indptr), v1.shape)
    assertArrayEqual(1**v, r.toarray().reshape(3))
コード例 #26
0
ファイル: test_sparse.py プロジェクト: qinxuye/mars
def test_sparse_mod():
    s1 = SparseNDArray(s1_data)
    s2 = SparseNDArray(s2_data)

    assertArrayEqual(s1 % s2, s1.toarray() % s2.toarray())
    assertArrayEqual(s1 % d1, s1.toarray() % d1)
    assertArrayEqual(d1 % s1, d1 % s1.toarray())
    assertArrayEqual(s1 % 2, s1.toarray() % 2)
    assertArrayEqual(2 % s1, 2 % s1.toarray())

    # test sparse vector
    v = SparseNDArray(v1, shape=(3, ))
    assertArrayEqual(v % v, v1_data % v1_data)
    assertArrayEqual(v % d1, v1_data % d1)
    assertArrayEqual(d1 % v, d1 % v1_data)
    r = sps.csr_matrix(((v1.data % 1), v1.indices, v1.indptr), v1.shape)
    assertArrayEqual(v % 1, r.toarray().reshape(3))
    r = sps.csr_matrix(((1 % v1.data), v1.indices, v1.indptr), v1.shape)
    assertArrayEqual(1 % v, r.toarray().reshape(3))
コード例 #27
0
    def execute(cls, ctx, op):
        import pyarrow.parquet as pq
        import pandas as pd
        import scipy.sparse as sps
        from mars.lib.sparse import SparseNDArray
        from ..io import open as fs_open

        dfs = []
        storage_opts = json.loads(op.storage_options)
        for p in op.paths:
            with fs_open(p, 'rb', **storage_opts) as inp_file:
                f = inp_file.read()
                dfs.append(pq.read_table(BytesIO(f)).to_pandas())

        chunk = op.outputs[0]
        if op.sparse and len(dfs) == 0:
            if len(chunk.shape) == 1:
                csr_array = sps.csr_matrix((chunk.shape[0], 1))
                ctx[chunk.key] = SparseNDArray(csr_array, shape=chunk.shape)
            else:
                csr_array = sps.csr_matrix(chunk.shape)
                ctx[chunk.key] = SparseNDArray(csr_array)
            return

        df_merged = pd.concat(dfs, ignore_index=True)
        dim_arrays = [df_merged[col] for col in op.dim_cols]
        value_array = df_merged[op.value_col].astype(chunk.dtype)
        del df_merged

        if op.sparse:
            if len(chunk.shape) == 1:
                dim_arrays.append(np.zeros((len(dim_arrays[0]))))
                csr_array = sps.csr_matrix((value_array, tuple(dim_arrays)),
                                           shape=(chunk.shape[0], 1))
            else:
                csr_array = sps.csr_matrix((value_array, tuple(dim_arrays)),
                                           shape=chunk.shape)
            del dim_arrays, value_array
            ctx[chunk.key] = SparseNDArray(csr_array, shape=chunk.shape)
        else:
            arr = np.empty(chunk.shape, dtype=value_array.dtype)
            arr[tuple(dim_arrays)] = value_array
            ctx[chunk.key] = arr
コード例 #28
0
    def testSparseUnary(self):
        s1 = SparseNDArray(self.s1)

        for method in ('negative', 'positive', 'absolute', 'abs', 'fabs', 'rint',
                       'sign', 'conj', 'exp', 'exp2', 'log', 'log2', 'log10',
                       'expm1', 'log1p', 'sqrt', 'square', 'cbrt', 'reciprocal',
                       'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan',
                       'arcsinh', 'arccosh', 'arctanh', 'deg2rad', 'rad2deg'):
            lm, rm = getattr(mls, method), getattr(np, method)
            self.assertArrayEqual(lm(s1), rm(self.s1.toarray()))
コード例 #29
0
ファイル: test_sparse.py プロジェクト: qinxuye/mars
def test_sparse_floor_divide():
    s1 = SparseNDArray(s1_data)
    s2 = SparseNDArray(s2_data)

    assertArrayEqual(s1 // s2, s1.toarray() // s2.toarray())
    assertArrayEqual(s1 // d1, s1.toarray() // d1)
    assertArrayEqual(d1 // s1, d1 // s1.toarray())
    assertArrayEqual(s1 // 2, s1.toarray() // 2)
    assertArrayEqual(2 // s1, 2 // s1.toarray())

    # test sparse vector
    v = SparseNDArray(v1, shape=(3, ))
    assertArrayEqual(v // v, v1_data // v1_data)
    assertArrayEqual(v // d1, v1_data // d1)
    assertArrayEqual(d1 // v, d1 // v1_data)
    r = sps.csr_matrix(((v1.data // 1), v1.indices, v1.indptr), v1.shape)
    assertArrayEqual(v // 1, r.toarray().reshape(3))
    r = sps.csr_matrix(((1 // v1.data), v1.indices, v1.indptr), v1.shape)
    assertArrayEqual(1 // v, r.toarray().reshape(3))
コード例 #30
0
ファイル: test_libs.py プロジェクト: deka108/mars
async def test_base_operations(storage_context):
    storage = storage_context

    data1 = np.random.rand(10, 10)
    put_info1 = await storage.put(data1)
    get_data1 = await storage.get(put_info1.object_id)
    np.testing.assert_array_equal(data1, get_data1)

    info1 = await storage.object_info(put_info1.object_id)
    # FIXME: remove os check when size issue fixed
    assert info1.size == put_info1.size or not sys.platform.startswith('linux')

    data2 = pd.DataFrame(
        {
            'col1': np.arange(10),
            'col2': [f'str{i}' for i in range(10)],
            'col3': np.random.rand(10)
        }, )
    put_info2 = await storage.put(data2)
    get_data2 = await storage.get(put_info2.object_id)
    pd.testing.assert_frame_equal(data2, get_data2)

    info2 = await storage.object_info(put_info2.object_id)
    # FIXME: remove os check when size issue fixed
    assert info2.size == put_info2.size or not sys.platform.startswith('linux')

    # FIXME: remove when list functionality is ready for vineyard.
    if not isinstance(storage,
                      (VineyardStorage, SharedMemoryStorage, RayStorage)):
        num = len(await storage.list())
        assert num == 2
        await storage.delete(info2.object_id)

    # test SparseMatrix
    s1 = sps.csr_matrix([[1, 0, 1], [0, 0, 1]])
    s = SparseNDArray(s1)
    put_info3 = await storage.put(s)
    get_data3 = await storage.get(put_info3.object_id)
    assert isinstance(get_data3, SparseMatrix)
    np.testing.assert_array_equal(get_data3.toarray(), s1.A)
    np.testing.assert_array_equal(get_data3.todense(), s1.A)

    # test writer and reader
    t = np.random.random(10)
    b = dataserializer.dumps(t)
    async with await storage.open_writer(size=len(b)) as writer:
        split = len(b) // 2
        await writer.write(b[:split])
        await writer.write(b[split:])

    async with await storage.open_reader(writer.object_id) as reader:
        content = await reader.read()
        t2 = dataserializer.loads(content)

    np.testing.assert_array_equal(t, t2)