예제 #1
0
    def test_transform(self):
        data1 = np.arange(400).reshape((100, 4))
        data2 = np.arange(200).reshape((100, 2))
        rdd1 = self.sc.parallelize(data1, 4)
        rdd2 = self.sc.parallelize(data2, 4)

        X = DictRDD(rdd1.zip(rdd2), bsize=5)

        X1 = [(x[0], x[1]**2) for x in X.collect()]
        X2 = X.transform(lambda a, b: (a, b**2))
        assert_multiple_tuples_equal(X1, X2.collect())

        X1 = [(x[0], x[1]**2) for x in X.collect()]
        X2 = X.transform(lambda x: x**2, column=1)
        assert_multiple_tuples_equal(X1, X2.collect())

        X1 = [(x[0]**2, x[1]) for x in X.collect()]
        X2 = X.transform(lambda x: x**2, column=0)
        assert_multiple_tuples_equal(X1, X2.collect())

        X1 = [(x[0]**2, x[1]**0.5) for x in X.collect()]
        X2 = X.transform(lambda a, b: (a**2, b**0.5), column=[0, 1])
        assert_multiple_tuples_equal(X1, X2.collect())

        X1 = [(x[0]**2, x[1]**0.5) for x in X.collect()]
        X2 = X.transform(lambda b, a: (b**0.5, a**2), column=[1, 0])
        assert_multiple_tuples_equal(X1, X2.collect())
예제 #2
0
    def test_transform(self):
        data1 = np.arange(400).reshape((100, 4))
        data2 = np.arange(200).reshape((100, 2))
        rdd1 = self.sc.parallelize(data1, 4)
        rdd2 = self.sc.parallelize(data2, 4)

        X = DictRDD(rdd1.zip(rdd2), bsize=5)

        X1 = [(x[0], x[1] ** 2) for x in X.collect()]
        X2 = X.transform(lambda a, b: (a, b ** 2)).collect()
        assert_multiple_tuples_equal(X1, X2)

        X1 = [(x[0], x[1] ** 2) for x in X.collect()]
        X2 = X.transform(lambda x: x ** 2, column=1).collect()
        assert_multiple_tuples_equal(X1, X2)

        X1 = [(x[0] ** 2, x[1]) for x in X.collect()]
        X2 = X.transform(lambda x: x ** 2, column=0).collect()
        assert_multiple_tuples_equal(X1, X2)

        X1 = [(x[0] ** 2, x[1] ** 0.5) for x in X.collect()]
        X2 = X.transform(lambda a, b: (a ** 2, b ** 0.5), column=[0, 1])
        assert_multiple_tuples_equal(X1, X2.collect())

        X1 = [(x[0] ** 2, x[1] ** 0.5) for x in X.collect()]
        X2 = X.transform(lambda b, a: (b ** 0.5, a ** 2), column=[1, 0])
        assert_multiple_tuples_equal(X1, X2.collect())
예제 #3
0
    def test_get_multiple_items(self):
        x, y = np.arange(80).reshape((40, 2)), np.arange(40)
        x_rdd = self.sc.parallelize(x, 2)
        y_rdd = self.sc.parallelize(y, 2)
        z_rdd = x_rdd.zip(y_rdd)
        z = DictRDD(z_rdd, bsize=5)

        expected = [(np.arange(0, 10).reshape((5, 2)), np.arange(0, 5)),
                    (np.arange(10, 20).reshape((5, 2)), np.arange(5, 10))]
        assert_array_equal(z[:2, 1].collect(),
                           [expected[0][1], expected[1][1]])
        assert_array_equal(z[[0, 1], 0].collect(),
                           [expected[0][0], expected[1][0]])
        assert_multiple_tuples_equal(z[[0, 1], [1]].collect(),
                                     [(expected[0][1], ), (expected[1][1], )])
        assert_multiple_tuples_equal(z[[0, 1], -1:].collect(),
                                     [(expected[0][1], ), (expected[1][1], )])
        assert_multiple_tuples_equal(z[[1, 0], [1, 0]].collect(),
                                     [expected[1][::-1], expected[0][::-1]])
예제 #4
0
    def test_get_multiple_items(self):
        x, y = np.arange(80).reshape((40, 2)), np.arange(40)
        x_rdd = self.sc.parallelize(x, 2)
        y_rdd = self.sc.parallelize(y, 2)
        z_rdd = x_rdd.zip(y_rdd)
        z = DictRDD(z_rdd, bsize=5)

        expected = [(np.arange(0, 10).reshape((5, 2)), np.arange(0, 5)),
                    (np.arange(10, 20).reshape((5, 2)), np.arange(5, 10))]
        assert_array_equal(z[:2, 1].collect(),
                           [expected[0][1], expected[1][1]])
        assert_array_equal(z[[0, 1], 0].collect(),
                           [expected[0][0], expected[1][0]])
        assert_multiple_tuples_equal(z[[0, 1], [1]].collect(),
                                     [(expected[0][1],),
                                      (expected[1][1],)])
        assert_multiple_tuples_equal(z[[0, 1], -1:].collect(),
                                     [(expected[0][1],),
                                      (expected[1][1],)])
        assert_multiple_tuples_equal(z[[1, 0], [1, 0]].collect(),
                                     [expected[1][::-1], expected[0][::-1]])
예제 #5
0
    def test_get_multiple_tuples(self):
        x, y = np.arange(80).reshape((40, 2)), np.arange(40)
        x_rdd = self.sc.parallelize(x, 2)
        y_rdd = self.sc.parallelize(y, 2)
        z_rdd = x_rdd.zip(y_rdd)
        z = DictRDD(z_rdd, bsize=5)

        expected = [(np.arange(0, 10).reshape((5, 2)), np.arange(0, 5)),
                    (np.arange(10, 20).reshape((5, 2)), np.arange(5, 10))]
        assert_multiple_tuples_equal(z[:2].collect(), expected)
        assert_multiple_tuples_equal(z[:2, :].collect(), expected)
        assert_multiple_tuples_equal(z[[0, 1]].collect(), expected)
        assert_multiple_tuples_equal(z[[0, 1], :].collect(), expected)
        assert_multiple_tuples_equal(z[[1, 0]].collect(), expected[::-1])

        expected = [(np.arange(50, 60).reshape((5, 2)), np.arange(25, 30)),
                    (np.arange(60, 70).reshape((5, 2)), np.arange(30, 35)),
                    (np.arange(70, 80).reshape((5, 2)), np.arange(35, 40))]
        assert_multiple_tuples_equal(z[-3:].collect(), expected)
        assert_multiple_tuples_equal(z[-3:, :].collect(), expected)
        assert_multiple_tuples_equal(z[[5, 6, 7]].collect(), expected)
        assert_multiple_tuples_equal(z[[5, 6, 7], :].collect(), expected)
        assert_multiple_tuples_equal(z[[7, 6, 5]].collect(), expected[::-1])
        assert_multiple_tuples_equal(z[[7, 6, 5], :].collect(), expected[::-1])
        assert_multiple_tuples_equal(z[[5, 7, 6]].collect(),
                                     [expected[0], expected[2], expected[1]])
예제 #6
0
    def test_get_multiple_tuples(self):
        x, y = np.arange(80).reshape((40, 2)), np.arange(40)
        x_rdd = self.sc.parallelize(x, 2)
        y_rdd = self.sc.parallelize(y, 2)
        z_rdd = x_rdd.zip(y_rdd)
        z = DictRDD(z_rdd, bsize=5)

        expected = [(np.arange(0, 10).reshape((5, 2)), np.arange(0, 5)),
                    (np.arange(10, 20).reshape((5, 2)), np.arange(5, 10))]
        assert_multiple_tuples_equal(z[:2].collect(), expected)
        assert_multiple_tuples_equal(z[:2, :].collect(), expected)
        assert_multiple_tuples_equal(z[[0, 1]].collect(), expected)
        assert_multiple_tuples_equal(z[[0, 1], :].collect(), expected)
        assert_multiple_tuples_equal(z[[1, 0]].collect(), expected[::-1])

        expected = [(np.arange(50, 60).reshape((5, 2)), np.arange(25, 30)),
                    (np.arange(60, 70).reshape((5, 2)), np.arange(30, 35)),
                    (np.arange(70, 80).reshape((5, 2)), np.arange(35, 40))]
        assert_multiple_tuples_equal(z[-3:].collect(), expected)
        assert_multiple_tuples_equal(z[-3:, :].collect(), expected)
        assert_multiple_tuples_equal(z[[5, 6, 7]].collect(), expected)
        assert_multiple_tuples_equal(z[[5, 6, 7], :].collect(), expected)
        assert_multiple_tuples_equal(z[[7, 6, 5]].collect(), expected[::-1])
        assert_multiple_tuples_equal(z[[7, 6, 5], :].collect(), expected[::-1])
        assert_multiple_tuples_equal(z[[5, 7, 6]].collect(),
                                     [expected[0], expected[2], expected[1]])