예제 #1
0
def _union(rdd: RDD, other: RDD, func):
    num_partition = max(rdd.getNumPartitions(), other.getNumPartitions())

    def _func(pair):
        iter1, iter2 = pair
        val1 = list(iter1)
        val2 = list(iter2)
        if not val1:
            return val2[0]
        if not val2:
            return val1[0]
        return func(val1[0], val2[0])

    return _map_value(rdd.cogroup(other, num_partition), _func)