예제 #1
0
    def test_auto_dtype(self):
        x = np.arange(80).reshape((40, 2))
        y = tuple(range(40))
        z = list(range(40))
        x_rdd = self.sc.parallelize(x, 4)
        y_rdd = self.sc.parallelize(y, 4)
        z_rdd = self.sc.parallelize(z, 4)

        expected = (np.arange(20).reshape(10, 2), tuple(range(10)),
                    list(range(10)))

        rdd = DictRDD([x_rdd, y_rdd, z_rdd])
        assert_tuple_equal(rdd.first(), expected)
        assert_equal(rdd.dtype, (np.ndarray, tuple, tuple))
        assert_true(check_rdd_dtype(rdd, {0: np.ndarray, 1: tuple, 2: tuple}))

        rdd = DictRDD([x_rdd, y_rdd, z_rdd], columns=('x', 'y', 'z'))
        assert_tuple_equal(rdd.first(), expected)
        assert_equal(rdd.dtype, (np.ndarray, tuple, tuple))
        assert_true(
            check_rdd_dtype(rdd, {
                'x': np.ndarray,
                'y': tuple,
                'z': tuple
            }))
예제 #2
0
    def test_creation_from_blocked_rdds(self):
        x, y, z = np.arange(80).reshape((40, 2)), np.arange(40), range(40)
        x_rdd = ArrayRDD(self.sc.parallelize(x, 4))
        y_rdd = ArrayRDD(self.sc.parallelize(y, 4))
        z_rdd = BlockRDD(self.sc.parallelize(z, 4), dtype=list)

        expected = (np.arange(20).reshape(10, 2), np.arange(10), range(10))
        rdd = DictRDD([x_rdd, y_rdd, z_rdd])
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD([x_rdd, y_rdd, z_rdd], columns=('x', 'y', 'z'))
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD([x_rdd, y_rdd, z_rdd], dtype=(None, None, list))
        first = rdd.first()
        assert_tuple_equal(first, expected)
        assert_is_instance(first[2], list)
예제 #3
0
    def test_creation_from_zipped_rdd(self):
        x = np.arange(80).reshape((40, 2))
        y = range(40)
        x_rdd = self.sc.parallelize(x, 4)
        y_rdd = self.sc.parallelize(y, 4)
        zipped_rdd = x_rdd.zip(y_rdd)

        expected = (np.arange(20).reshape(10, 2), tuple(range(10)))

        rdd = DictRDD(zipped_rdd)
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD(zipped_rdd, columns=('x', 'y'))
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD(zipped_rdd, dtype=(np.ndarray, list))
        first = rdd.first()
        assert_tuple_equal(first, expected)
        assert_is_instance(first[1], list)
예제 #4
0
    def test_creation_from_zipped_rdd(self):
        x = np.arange(80).reshape((40, 2))
        y = range(40)
        x_rdd = self.sc.parallelize(x, 4)
        y_rdd = self.sc.parallelize(y, 4)
        zipped_rdd = x_rdd.zip(y_rdd)

        expected = (np.arange(20).reshape(10, 2), tuple(range(10)))

        rdd = DictRDD(zipped_rdd)
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD(zipped_rdd, columns=('x', 'y'))
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD(zipped_rdd, dtype=(np.ndarray, list))
        first = rdd.first()
        assert_tuple_equal(first, expected)
        assert_is_instance(first[1], list)
예제 #5
0
    def test_creation_from_blocked_rdds(self):
        x = np.arange(80).reshape((40, 2))
        y = np.arange(40)
        z = list(range(40))
        x_rdd = ArrayRDD(self.sc.parallelize(x, 4))
        y_rdd = ArrayRDD(self.sc.parallelize(y, 4))
        z_rdd = BlockRDD(self.sc.parallelize(z, 4), dtype=list)

        expected = (np.arange(20).reshape(10,
                                          2), np.arange(10), list(range(10)))
        rdd = DictRDD([x_rdd, y_rdd, z_rdd])
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD([x_rdd, y_rdd, z_rdd], columns=('x', 'y', 'z'))
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD([x_rdd, y_rdd, z_rdd], dtype=(None, None, list))
        first = rdd.first()
        assert_tuple_equal(first, expected)
        assert_is_instance(first[2], list)
예제 #6
0
    def test_creation_from_rdds(self):
        x = np.arange(80).reshape((40, 2))
        y = np.arange(40)
        z = list(range(40))
        x_rdd = self.sc.parallelize(x, 4)
        y_rdd = self.sc.parallelize(y, 4)
        z_rdd = self.sc.parallelize(z, 4)

        expected = (
            np.arange(20).reshape(10, 2),
            np.arange(10), list(range(10))
        )
        rdd = DictRDD([x_rdd, y_rdd, z_rdd])
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD([x_rdd, y_rdd, z_rdd], columns=('x', 'y', 'z'))
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD([x_rdd, y_rdd, z_rdd],
                      dtype=(np.ndarray, np.ndarray, list))
        first = rdd.first()
        assert_tuple_equal(first, expected)
        assert_is_instance(first[2], list)
예제 #7
0
    def test_auto_dtype(self):
        x = np.arange(80).reshape((40, 2))
        y = tuple(range(40))
        z = list(range(40))
        x_rdd = self.sc.parallelize(x, 4)
        y_rdd = self.sc.parallelize(y, 4)
        z_rdd = self.sc.parallelize(z, 4)

        expected = (np.arange(20).reshape(10, 2), tuple(range(10)),
                    list(range(10)))

        rdd = DictRDD([x_rdd, y_rdd, z_rdd])
        assert_tuple_equal(rdd.first(), expected)
        assert_equal(rdd.dtype, (np.ndarray, tuple, tuple))
        assert_true(check_rdd_dtype(rdd, {0: np.ndarray, 1: tuple, 2: tuple}))

        rdd = DictRDD([x_rdd, y_rdd, z_rdd], columns=('x', 'y', 'z'))
        assert_tuple_equal(rdd.first(), expected)
        assert_equal(rdd.dtype, (np.ndarray, tuple, tuple))
        assert_true(check_rdd_dtype(rdd, {'x': np.ndarray, 'y': tuple,
                                          'z': tuple}))
예제 #8
0
    def test_get_single_tuple(self):
        x, y = np.arange(80).reshape((40, 2)), np.arange(40)
        x_rdd = self.sc.parallelize(x, 2)
        y_rdd = self.sc.parallelize(y, 2)
        z_rdd = x_rdd.zip(y_rdd)
        z = DictRDD(z_rdd, bsize=5)

        expected = np.arange(0, 10).reshape((5, 2)), np.arange(5)
        for tpl in [z.first(), z[0].first(), z[0].first()]:
            assert_tuple_equal(tpl, expected)

        expected = np.arange(30, 40).reshape((5, 2)), np.arange(15, 20)
        for tpl in [z[3].first(), z[3].first(), z[-5].first()]:
            assert_tuple_equal(tpl, expected)

        expected = np.arange(70, 80).reshape((5, 2)), np.arange(35, 40)
        for tpl in [z[7].first(), z[7].first(), z[-1].first()]:
            assert_tuple_equal(tpl, expected)
예제 #9
0
    def test_get_single_tuple(self):
        x, y = np.arange(80).reshape((40, 2)), np.arange(40)
        x_rdd = self.sc.parallelize(x, 2)
        y_rdd = self.sc.parallelize(y, 2)
        z_rdd = x_rdd.zip(y_rdd)
        z = DictRDD(z_rdd, bsize=5)

        expected = np.arange(0, 10).reshape((5, 2)), np.arange(5)
        for tpl in [z.first(), z[0].first(), z[0].first()]:
            assert_tuple_equal(tpl, expected)

        expected = np.arange(30, 40).reshape((5, 2)), np.arange(15, 20)
        for tpl in [z[3].first(), z[3].first(), z[-5].first()]:
            assert_tuple_equal(tpl, expected)

        expected = np.arange(70, 80).reshape((5, 2)), np.arange(35, 40)
        for tpl in [z[7].first(), z[7].first(), z[-1].first()]:
            assert_tuple_equal(tpl, expected)