def test_partition_number(self): blocked = BlockRDD(self.generate(1000, 5), bsize=50) assert_equal(blocked.partitions, 5) blocked = BlockRDD(self.generate(621, 3), bsize=45) assert_equal(blocked.partitions, 3) blocked = BlockRDD(self.generate(100, 10)) assert_equal(blocked.partitions, 10)
def test_unblock(self): blocked = BlockRDD(self.generate(1000, 5)) unblocked = blocked.unblock() assert_is_instance(blocked, BlockRDD) assert_equal(unblocked.collect(), range(1000)) blocked = BlockRDD(self.generate(1000, 5), dtype=tuple) unblocked = blocked.unblock() assert_is_instance(blocked, BlockRDD) assert_equal(unblocked.collect(), range(1000))
def test_blocks_number(self): blocked = BlockRDD(self.generate(1000), bsize=50) assert_equal(blocked.blocks, 20) blocked = BlockRDD(self.generate(621), bsize=45) assert_equal(blocked.blocks, 20) blocked = BlockRDD(self.generate(100), bsize=4) assert_equal(blocked.blocks, 30) blocked = BlockRDD(self.generate(79, 2), bsize=9) assert_equal(blocked.blocks, 10) blocked = BlockRDD(self.generate(89, 2), bsize=5) assert_equal(blocked.blocks, 18)
def test_length(self): blocked = BlockRDD(self.generate(1000)) assert_equal(len(blocked), 1000) blocked = BlockRDD(self.generate(100)) assert_equal(len(blocked), 100) blocked = BlockRDD(self.generate(79)) assert_equal(len(blocked), 79) blocked = BlockRDD(self.generate(89)) assert_equal(len(blocked), 89) blocked = BlockRDD(self.generate(62)) assert_equal(len(blocked), 62)
def test_dtypes(self): rdd = self.generate() blocked = BlockRDD(rdd, dtype=list) assert_is_instance(blocked.first(), list) blocked = BlockRDD(rdd, dtype=tuple) assert_is_instance(blocked.first(), tuple) blocked = BlockRDD(rdd, dtype=set) assert_is_instance(blocked.first(), set) blocked = BlockRDD(rdd, dtype=np.array) assert_is_instance(blocked.first(), np.ndarray)
def test_tolist(self): blocked = BlockRDD(self.generate(1000, 5)) unblocked = blocked.tolist() assert_is_instance(blocked, BlockRDD) assert_equal(unblocked, list(range(1000))) blocked = BlockRDD(self.generate(1000, 5), dtype=tuple) unblocked = blocked.tolist() assert_is_instance(blocked, BlockRDD) assert_equal(unblocked, list(range(1000))) blocked = BlockRDD(self.generate(1000, 5), dtype=np.array) unblocked = blocked.tolist() assert_is_instance(blocked, BlockRDD) assert_equal(unblocked, list(range(1000)))
def test_creation(self): rdd = self.generate() blocked = BlockRDD(rdd) assert_is_instance(blocked, BlockRDD) expected = tuple(range(10)) assert_equal(blocked.first(), expected) expected = [tuple(v) for v in np.arange(100).reshape(10, 10)] assert_equal(blocked.collect(), expected) blocked = BlockRDD(rdd, bsize=4) assert_is_instance(blocked, BlockRDD) expected = tuple(range(4)) assert_equal(blocked.first(), expected) expected = [4, 4, 2] * 10 assert_equal([len(x) for x in blocked.collect()], expected)
def test_unblock(self): blocked = BlockRDD(self.generate(1000, 5)) unblocked = blocked.unblock() assert_is_instance(blocked, BlockRDD) assert_equal(unblocked.collect(), list(range(1000))) blocked = BlockRDD(self.generate(1000, 5), dtype=tuple) unblocked = blocked.unblock() assert_is_instance(blocked, BlockRDD) assert_equal(unblocked.collect(), list(range(1000)))
def test_creation(self): rdd = self.generate() blocked = BlockRDD(rdd) assert_is_instance(blocked, BlockRDD) assert_equal(blocked.first(), range(10)) assert_equal(blocked.collect(), np.arange(100).reshape(10, 10).tolist()) blocked = BlockRDD(rdd, bsize=4) assert_is_instance(blocked, BlockRDD) assert_equal(blocked.first(), range(4)) assert_equal([len(x) for x in blocked.collect()], [4, 4, 2] * 10)
def test_creation_from_blocked_rdds(self): x = np.arange(80).reshape((40, 2)) y = np.arange(40) z = list(range(40)) x_rdd = ArrayRDD(self.sc.parallelize(x, 4)) y_rdd = ArrayRDD(self.sc.parallelize(y, 4)) z_rdd = BlockRDD(self.sc.parallelize(z, 4), dtype=list) expected = (np.arange(20).reshape(10, 2), np.arange(10), list(range(10))) rdd = DictRDD([x_rdd, y_rdd, z_rdd]) assert_tuple_equal(rdd.first(), expected) rdd = DictRDD([x_rdd, y_rdd, z_rdd], columns=('x', 'y', 'z')) assert_tuple_equal(rdd.first(), expected) rdd = DictRDD([x_rdd, y_rdd, z_rdd], dtype=(None, None, list)) first = rdd.first() assert_tuple_equal(first, expected) assert_is_instance(first[2], list)
def test_tolist(self): blocked = BlockRDD(self.generate(1000, 5)) unblocked = blocked.tolist() assert_is_instance(blocked, BlockRDD) assert_equal(unblocked, range(1000)) blocked = BlockRDD(self.generate(1000, 5), dtype=tuple) unblocked = blocked.tolist() assert_is_instance(blocked, BlockRDD) assert_equal(unblocked, range(1000)) blocked = BlockRDD(self.generate(1000, 5), dtype=np.array) unblocked = blocked.tolist() assert_is_instance(blocked, BlockRDD) assert_equal(unblocked, range(1000))