Esempio n. 1
0
class EmptyTest(unittest.TestCase):


    def setUp(self):
        self.sc = Context();


    def tearDown(self):
        pass
    def test_empty_map(self):
        # check that map fails when the data is not there
        with self.assertRaises(ValueError):
            self.sc.parallelize([]).map(add)
            
    def test_empty_reduce(self):
        # check that reduce fails when the data is not there
        with self.assertRaises(ValueError):
            self.sc.parallelize([]).reduce(add)
            
    def test_empty_join(self):
        rdd1 = self.sc.parallelize([]);
        rdd2 = self.sc.parallelize([("a","two"),("b",3),("c",1)]); 
        self.assertEqual(rdd1.join(rdd2).collect(), []);
    
    def test_empty_keyby(self):
        rdd = self.sc.parallelize([]).keyBy(lambda x: x+x); 
        self.assertEqual(rdd.collect(), []);
        
    def test_empty_countByKey(self):
        rdd = self.sc.parallelize([]).countByKey(); 
        self.assertEqual(list(rdd.items()), []);
class InvalidInputTest(unittest.TestCase):


    def setUp(self):
        self.sc = Context();


    def tearDown(self):
        pass
    
    def test_map_func_with_two_args(self):
        # map with a function that raises an exception
        with self.assertRaises(TypeError):
            self.sc.parallelize([1,2,3]).map(add)
    
    def test_map_invalid_arguments(self):
        # map with a function that raises an exception
        with self.assertRaises(TypeError):
            self.sc.parallelize(["a","b"]).map(abs)
            
    def test_reduce_invalid(self):
        # reduce with a function that raises an exception
        with self.assertRaises(TypeError):
            self.sc.parallelize([("a","apple"),("a","art")]).reduce(mul)
            
class FuncationalityTest(unittest.TestCase):


    def setUp(self):
        self.sc = Context();


    def tearDown(self):
        pass


    def test_parralelize(self):
        self.assertEqual(self.sc.parallelize([1,2,3]).getData(), [1,2,3])
    
    def test_collect(self):
        self.assertEqual(self.sc.parallelize([(1,3),(3,2),('a',3)]).collect(),
                          [(1,3),(3,2),('a',3)])

    
    def test_map(self):
            self.assertEqual(self.sc.parallelize(
                                                     [1,"apple",0.3]
                                                     ).map(lambda x:x).collect(),
                                  [(1, 1), ('apple', 'apple'), (0.3, 0.3)])

    
    def test_reduce(self):
        self.assertEqual(self.sc.parallelize(list(range(0,6)))
                         .reduce(add).collect(), 15);
    
    def test_keyBy(self):
        self.assertEqual(self.sc.parallelize(list(range(-4,2))).keyBy(abs).collect(),
                          [(-4, 4), (-3, 3), (-2, 2), (-1, 1), (0, 0), (1, 1)]);

    
    def test_countByKey(self):
        self.assertEqual(sorted(self.sc.parallelize([("fruit","apple"), ("animal","dog"), ("fruit","orange")])
                         .countByKey().items()),
                          [("animal",1),("fruit",2)]);

    def test_join(self):
        rdd1 = self.sc.parallelize([("a",1),("b",2),("c",0)]);
        rdd2 = self.sc.parallelize([("a","two"),("b",3),("c",1)]); 
        self.assertEqual(rdd1.join(rdd2).collect(), [("a",(1,"two")), ("b",(2,3)), ("c",(0,1))]);
Esempio n. 4
0
'''
Created on Nov 16, 2015

@author: Saeed Zareian
'''
from _operator import add

from sspark import Context


if __name__ == '__main__':
    print("Word Count example...");
    sc = Context()
    text = """Hello Diederik I am Saeed and I want to demonstrate
    you the way my framework handles wordcount example. This example is a
    classic example of big data solutions """;
    text = text.lower();
    text = text.replace("\n", "")

    text= text.split(" ");
    text= [y for y in text if y != ""]
    text= [y for y in text if y != " "]


    rdd = sc.parallelize(text)

    rdd.map(lambda x: (1)).reduce(add).collect()