def test_initialization(self): n_partitions = 4 n_samples = 100 data = [np.array([1, 2]) for i in range(n_samples)] rdd = self.sc.parallelize(data, n_partitions) assert_raises(TypeError, ArrayRDD, data) assert_raises(TypeError, ArrayRDD, data, False) assert_raises(TypeError, ArrayRDD, data, 10) assert_is_instance(ArrayRDD(rdd), ArrayRDD) assert_is_instance(ArrayRDD(rdd, 10), ArrayRDD) assert_is_instance(ArrayRDD(rdd, None), ArrayRDD)
def test_initialization(self): n_partitions = 4 n_samples = 100 data = [(1, 2) for i in range(n_samples)] rdd = self.sc.parallelize(data, n_partitions) assert_raises(TypeError, DictRDD, data) assert_raises(TypeError, DictRDD, data, bsize=False) assert_raises(TypeError, DictRDD, data, bsize=10) assert_is_instance(DictRDD(rdd), DictRDD) assert_is_instance(DictRDD(rdd), BlockRDD) assert_is_instance(DictRDD(rdd, bsize=10), DictRDD) assert_is_instance(DictRDD(rdd), BlockRDD) assert_is_instance(DictRDD(rdd, bsize=None), DictRDD) assert_is_instance(DictRDD(rdd), BlockRDD)
def test_empty(self): n_partitions = 3 empty_data = self.sc.parallelize([], n_partitions) assert_raises(ValueError, block, empty_data)