def test_transform(self): data1 = np.arange(400).reshape((100, 4)) data2 = np.arange(200).reshape((100, 2)) rdd1 = self.sc.parallelize(data1, 4) rdd2 = self.sc.parallelize(data2, 4) X = DictRDD(rdd1.zip(rdd2), bsize=5) X1 = [(x[0], x[1]**2) for x in X.collect()] X2 = X.transform(lambda a, b: (a, b**2)) assert_multiple_tuples_equal(X1, X2.collect()) X1 = [(x[0], x[1]**2) for x in X.collect()] X2 = X.transform(lambda x: x**2, column=1) assert_multiple_tuples_equal(X1, X2.collect()) X1 = [(x[0]**2, x[1]) for x in X.collect()] X2 = X.transform(lambda x: x**2, column=0) assert_multiple_tuples_equal(X1, X2.collect()) X1 = [(x[0]**2, x[1]**0.5) for x in X.collect()] X2 = X.transform(lambda a, b: (a**2, b**0.5), column=[0, 1]) assert_multiple_tuples_equal(X1, X2.collect()) X1 = [(x[0]**2, x[1]**0.5) for x in X.collect()] X2 = X.transform(lambda b, a: (b**0.5, a**2), column=[1, 0]) assert_multiple_tuples_equal(X1, X2.collect())
def test_transform(self): data1 = np.arange(400).reshape((100, 4)) data2 = np.arange(200).reshape((100, 2)) rdd1 = self.sc.parallelize(data1, 4) rdd2 = self.sc.parallelize(data2, 4) X = DictRDD(rdd1.zip(rdd2), bsize=5) X1 = [(x[0], x[1] ** 2) for x in X.collect()] X2 = X.transform(lambda a, b: (a, b ** 2)).collect() assert_multiple_tuples_equal(X1, X2) X1 = [(x[0], x[1] ** 2) for x in X.collect()] X2 = X.transform(lambda x: x ** 2, column=1).collect() assert_multiple_tuples_equal(X1, X2) X1 = [(x[0] ** 2, x[1]) for x in X.collect()] X2 = X.transform(lambda x: x ** 2, column=0).collect() assert_multiple_tuples_equal(X1, X2) X1 = [(x[0] ** 2, x[1] ** 0.5) for x in X.collect()] X2 = X.transform(lambda a, b: (a ** 2, b ** 0.5), column=[0, 1]) assert_multiple_tuples_equal(X1, X2.collect()) X1 = [(x[0] ** 2, x[1] ** 0.5) for x in X.collect()] X2 = X.transform(lambda b, a: (b ** 0.5, a ** 2), column=[1, 0]) assert_multiple_tuples_equal(X1, X2.collect())
def test_get_multiple_items(self): x, y = np.arange(80).reshape((40, 2)), np.arange(40) x_rdd = self.sc.parallelize(x, 2) y_rdd = self.sc.parallelize(y, 2) z_rdd = x_rdd.zip(y_rdd) z = DictRDD(z_rdd, bsize=5) expected = [(np.arange(0, 10).reshape((5, 2)), np.arange(0, 5)), (np.arange(10, 20).reshape((5, 2)), np.arange(5, 10))] assert_array_equal(z[:2, 1].collect(), [expected[0][1], expected[1][1]]) assert_array_equal(z[[0, 1], 0].collect(), [expected[0][0], expected[1][0]]) assert_multiple_tuples_equal(z[[0, 1], [1]].collect(), [(expected[0][1], ), (expected[1][1], )]) assert_multiple_tuples_equal(z[[0, 1], -1:].collect(), [(expected[0][1], ), (expected[1][1], )]) assert_multiple_tuples_equal(z[[1, 0], [1, 0]].collect(), [expected[1][::-1], expected[0][::-1]])
def test_get_multiple_items(self): x, y = np.arange(80).reshape((40, 2)), np.arange(40) x_rdd = self.sc.parallelize(x, 2) y_rdd = self.sc.parallelize(y, 2) z_rdd = x_rdd.zip(y_rdd) z = DictRDD(z_rdd, bsize=5) expected = [(np.arange(0, 10).reshape((5, 2)), np.arange(0, 5)), (np.arange(10, 20).reshape((5, 2)), np.arange(5, 10))] assert_array_equal(z[:2, 1].collect(), [expected[0][1], expected[1][1]]) assert_array_equal(z[[0, 1], 0].collect(), [expected[0][0], expected[1][0]]) assert_multiple_tuples_equal(z[[0, 1], [1]].collect(), [(expected[0][1],), (expected[1][1],)]) assert_multiple_tuples_equal(z[[0, 1], -1:].collect(), [(expected[0][1],), (expected[1][1],)]) assert_multiple_tuples_equal(z[[1, 0], [1, 0]].collect(), [expected[1][::-1], expected[0][::-1]])
def test_get_multiple_tuples(self): x, y = np.arange(80).reshape((40, 2)), np.arange(40) x_rdd = self.sc.parallelize(x, 2) y_rdd = self.sc.parallelize(y, 2) z_rdd = x_rdd.zip(y_rdd) z = DictRDD(z_rdd, bsize=5) expected = [(np.arange(0, 10).reshape((5, 2)), np.arange(0, 5)), (np.arange(10, 20).reshape((5, 2)), np.arange(5, 10))] assert_multiple_tuples_equal(z[:2].collect(), expected) assert_multiple_tuples_equal(z[:2, :].collect(), expected) assert_multiple_tuples_equal(z[[0, 1]].collect(), expected) assert_multiple_tuples_equal(z[[0, 1], :].collect(), expected) assert_multiple_tuples_equal(z[[1, 0]].collect(), expected[::-1]) expected = [(np.arange(50, 60).reshape((5, 2)), np.arange(25, 30)), (np.arange(60, 70).reshape((5, 2)), np.arange(30, 35)), (np.arange(70, 80).reshape((5, 2)), np.arange(35, 40))] assert_multiple_tuples_equal(z[-3:].collect(), expected) assert_multiple_tuples_equal(z[-3:, :].collect(), expected) assert_multiple_tuples_equal(z[[5, 6, 7]].collect(), expected) assert_multiple_tuples_equal(z[[5, 6, 7], :].collect(), expected) assert_multiple_tuples_equal(z[[7, 6, 5]].collect(), expected[::-1]) assert_multiple_tuples_equal(z[[7, 6, 5], :].collect(), expected[::-1]) assert_multiple_tuples_equal(z[[5, 7, 6]].collect(), [expected[0], expected[2], expected[1]])