Ejemplo n.º 1
0
from sspark.Context import Context
from operator import add

if __name__ == "__main__":
    print("RUNNING...");
    sc = Context()
    rdd = sc.parallelize([1,2,3,4,5])
    rdd.keyBy(lambda x: x*x).collect()
Ejemplo n.º 2
0
from sspark.Context import Context
from operator import add

if __name__ == "__main__":
    print("RUNNING...");
    sc = Context()
    rdd = sc.parallelize([("a", -1), ("b",0), ("a", 1)])
    print(sorted(rdd.countByKey().items()))
Ejemplo n.º 3
0
from sspark.Context import Context
from operator import add

if __name__ == "__main__":
    print("JOIN sample...");
    sc = Context()
    x = sc.parallelize([("a", 1), ("b", 4)])
    y = sc.parallelize([("a", 2), ("a", 3)])
    sorted(x.join(y).collect())
    '''[('a', (1, 2)), ('a', (1, 3))]'''