class EmptyTest(unittest.TestCase): def setUp(self): self.sc = Context(); def tearDown(self): pass def test_empty_map(self): # check that map fails when the data is not there with self.assertRaises(ValueError): self.sc.parallelize([]).map(add) def test_empty_reduce(self): # check that reduce fails when the data is not there with self.assertRaises(ValueError): self.sc.parallelize([]).reduce(add) def test_empty_join(self): rdd1 = self.sc.parallelize([]); rdd2 = self.sc.parallelize([("a","two"),("b",3),("c",1)]); self.assertEqual(rdd1.join(rdd2).collect(), []); def test_empty_keyby(self): rdd = self.sc.parallelize([]).keyBy(lambda x: x+x); self.assertEqual(rdd.collect(), []); def test_empty_countByKey(self): rdd = self.sc.parallelize([]).countByKey(); self.assertEqual(list(rdd.items()), []);
class InvalidInputTest(unittest.TestCase): def setUp(self): self.sc = Context(); def tearDown(self): pass def test_map_func_with_two_args(self): # map with a function that raises an exception with self.assertRaises(TypeError): self.sc.parallelize([1,2,3]).map(add) def test_map_invalid_arguments(self): # map with a function that raises an exception with self.assertRaises(TypeError): self.sc.parallelize(["a","b"]).map(abs) def test_reduce_invalid(self): # reduce with a function that raises an exception with self.assertRaises(TypeError): self.sc.parallelize([("a","apple"),("a","art")]).reduce(mul)
class FuncationalityTest(unittest.TestCase): def setUp(self): self.sc = Context(); def tearDown(self): pass def test_parralelize(self): self.assertEqual(self.sc.parallelize([1,2,3]).getData(), [1,2,3]) def test_collect(self): self.assertEqual(self.sc.parallelize([(1,3),(3,2),('a',3)]).collect(), [(1,3),(3,2),('a',3)]) def test_map(self): self.assertEqual(self.sc.parallelize( [1,"apple",0.3] ).map(lambda x:x).collect(), [(1, 1), ('apple', 'apple'), (0.3, 0.3)]) def test_reduce(self): self.assertEqual(self.sc.parallelize(list(range(0,6))) .reduce(add).collect(), 15); def test_keyBy(self): self.assertEqual(self.sc.parallelize(list(range(-4,2))).keyBy(abs).collect(), [(-4, 4), (-3, 3), (-2, 2), (-1, 1), (0, 0), (1, 1)]); def test_countByKey(self): self.assertEqual(sorted(self.sc.parallelize([("fruit","apple"), ("animal","dog"), ("fruit","orange")]) .countByKey().items()), [("animal",1),("fruit",2)]); def test_join(self): rdd1 = self.sc.parallelize([("a",1),("b",2),("c",0)]); rdd2 = self.sc.parallelize([("a","two"),("b",3),("c",1)]); self.assertEqual(rdd1.join(rdd2).collect(), [("a",(1,"two")), ("b",(2,3)), ("c",(0,1))]);
''' Created on Nov 16, 2015 @author: Saeed Zareian ''' from _operator import add from sspark import Context if __name__ == '__main__': print("Word Count example..."); sc = Context() text = """Hello Diederik I am Saeed and I want to demonstrate you the way my framework handles wordcount example. This example is a classic example of big data solutions """; text = text.lower(); text = text.replace("\n", "") text= text.split(" "); text= [y for y in text if y != ""] text= [y for y in text if y != " "] rdd = sc.parallelize(text) rdd.map(lambda x: (1)).reduce(add).collect()