コード例 #1
0
def test_cache():
    my_rdd = Context().parallelize([1, 2, 3, 4], 2)
    my_rdd = my_rdd.map(lambda x: x*x).cache()
    print('no exec until here')
    print(my_rdd.first())
    print('executed map on first partition only')
    print(my_rdd.collect())
    print('now map() was executed on all partitions and should '
          'not be executed again')
    print(my_rdd.collect())
    assert len(my_rdd.collect()) == 4 and 16 in my_rdd.collect()
コード例 #2
0
def test_first_partitions():
    my_rdd = Context().parallelize([1, 2, 2, 4, 1, 3, 5, 9], 3)
    print(my_rdd.first())
    assert my_rdd.first() == 1