from sspark.Context import Context from operator import add if __name__ == "__main__": print("RUNNING..."); sc = Context() rdd = sc.parallelize([1,2,3,4,5]) rdd.keyBy(lambda x: x*x).collect()
from sspark.Context import Context from operator import add if __name__ == "__main__": print("RUNNING..."); sc = Context() rdd = sc.parallelize([("a", -1), ("b",0), ("a", 1)]) print(sorted(rdd.countByKey().items()))
from sspark.Context import Context from operator import add if __name__ == "__main__": print("JOIN sample..."); sc = Context() x = sc.parallelize([("a", 1), ("b", 4)]) y = sc.parallelize([("a", 2), ("a", 3)]) sorted(x.join(y).collect()) '''[('a', (1, 2)), ('a', (1, 3))]'''