コード例 #1
0
ファイル: aggregate.py プロジェクト: sunrenjie/spark-pytools
from spark_pytools.jobs.init import sc

nums = sc.parallelize([1,3,5,9])
sumCount = nums.aggregate((0, 0),
    (lambda acc, value: (acc[0] + value, acc[1] + 1)),
    (lambda acc1, acc2: (acc1[0] + acc2[0], acc1[1] + acc2[1])))

print(sumCount[0] / float(sumCount[1]))
コード例 #2
0
# compute Per-key average as in Example 4-12.
from spark_pytools.jobs.init import sc

nums = sc.parallelize(((1, 2), (3, 4), (3, 6)))
sum_count = nums.combineByKey(
    (lambda x: (x, 1)),  # initial value creator
    (lambda x, y: (x[0] + y, x[1] + 1)),  # accumulator
    (lambda x, y: (x[0] + y[0], x[1] + y[1]))  # combiner
)
print(sum_count.mapValues(lambda p: float(p[0]) / p[1]).collectAsMap())
コード例 #3
0
ファイル: aggregate.py プロジェクト: sunrenjie/spark-pytools
from spark_pytools.jobs.init import sc

nums = sc.parallelize([1, 3, 5, 9])
sumCount = nums.aggregate(
    (0, 0), (lambda acc, value: (acc[0] + value, acc[1] + 1)),
    (lambda acc1, acc2: (acc1[0] + acc2[0], acc1[1] + acc2[1])))

print(sumCount[0] / float(sumCount[1]))
コード例 #4
0
# compute Per-key average as in Example 4-12.
from spark_pytools.jobs.init import sc

nums = sc.parallelize(((1, 2), (3, 4), (3,6)))
sum_count = nums.combineByKey(
    (lambda x: (x, 1)),  # initial value creator
    (lambda x, y: (x[0] + y, x[1] + 1)),  # accumulator
    (lambda x, y: (x[0] + y[0], x[1] + y[1]))  # combiner
)
print(sum_count.mapValues(lambda p: float(p[0]) / p[1]).collectAsMap())