def merge_tags_rdd(extracted_tags): return CTX.parallelize(extracted_tags.reduce(_merge), numSlices = 20)
def _assign(line): line_rdd = CTX.parallelize(line) result = g_reduced_rdd.union(line_rdd).groupByKey()\ .filter(lambda x:len(x[1])>1).map(lambda x :(x[0], max(x[1]))) return result.collect()