def test_sortbykey(self): dataLocal = [((0, 0), array([0])), ((0, 1), array([0])), ((0, 2), array([0])), ((1, 0), array([0])), ((1, 1), array([0])), ((1, 2), array([0]))] data = Data(self.sc.parallelize(dataLocal)) out = data.sortByKey().keys().collect() assert (array_equal(out, [(0, 0), (1, 0), (0, 1), (1, 1), (0, 2), (1, 2)])) dataLocal = [((0, ), array([0])), ((1, ), array([0])), ((2, ), array([0]))] data = Data(self.sc.parallelize(dataLocal)) out = data.sortByKey().keys().collect() assert (array_equal(out, [(0, ), (1, ), (2, )]))
def setUp(self): super(TestSeriesGetters, self).setUp() self.dataLocal = [((0, 0), array([1.0, 2.0, 3.0], dtype='float32')), ((0, 1), array([2.0, 2.0, 4.0], dtype='float32')), ((1, 0), array([4.0, 2.0, 1.0], dtype='float32')), ((1, 1), array([3.0, 1.0, 1.0], dtype='float32'))] self.series = Data(self.sc.parallelize(self.dataLocal), dtype='float32')
def test_range_tuple_key(self): dataLocal = [ ((0, 0), array([0])), ((0, 1), array([1])), ((0, 2), array([2])), ((1, 0), array([3])), ((1, 1), array([4])), ((1, 2), array([5])) ] data = Data(self.sc.parallelize(dataLocal)) out = data.range((0, 0), (1, 1)).collectKeysAsArray() assert(array_equal(out, [(0, 0), (0, 1), (0, 2), (1, 0)]))
def test_collect(self): dataLocal = [((0, 0), array([0])), ((0, 1), array([1])), ((0, 2), array([2])), ((1, 0), array([3])), ((1, 1), array([4])), ((1, 2), array([5]))] data = Data(self.sc.parallelize(dataLocal)) out = data.collectKeysAsArray() assert (array_equal(out, [[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2]])) out = data.collectValuesAsArray() assert (array_equal(out, [[0], [1], [2], [3], [4], [5]]))
def setUp(self): super(TestCasting, self).setUp() # float16 max value is 6.55040e+04 (np.finfo(np.float16)) # "*Big*" values are too large to cast safely down to float16s DATA = [('float32Array', array([1.1, 2.2], dtype='float32')), ('float32BigArray', array([1.1e+05, 2.2e+05], dtype='float32')), ('float32Scalar', float32(1.1)), ('float32BigScalar', float32(4.4e+05)), ('pythonFloatScalar', 1.1), ('pythonFloatBigScalar', 5.5e+05)] for datum in DATA: k, v = datum rdd = self.sc.parallelize([(0, v)]) data = Data(rdd, nrecords=1, dtype='float32') setattr(self, k, v) setattr(self, k + "RDD", rdd) setattr(self, k + "Data", data) self.allCases = [datum[0] for datum in DATA]
def test_range_int_key(self): dataLocal = [ (0, array([0])), (1, array([1])), (2, array([2])), (3, array([3])), (4, array([4])), (5, array([5])) ] data = Data(self.sc.parallelize(dataLocal)) out = data.range(0, 2).collectKeysAsArray() assert(array_equal(out, [0, 1])) out = data.range(0, 5).collectKeysAsArray() assert(array_equal(out, [0, 1, 2, 3, 4])) out = data.range(0, 6).collectKeysAsArray() assert(array_equal(out, [0, 1, 2, 3, 4, 5]))
def setUp(self): super(TestImagesGetters, self).setUp() self.ary1 = array([[1, 2], [3, 4]], dtype='int16') self.ary2 = array([[5, 6], [7, 8]], dtype='int16') rdd = self.sc.parallelize([(0, self.ary1), (1, self.ary2)]) self.images = Data(rdd, dtype='int16')