def merge_tags_rdd(extracted_tags):
    return CTX.parallelize(extracted_tags.reduce(_merge), numSlices = 20)
def _assign(line):
    line_rdd = CTX.parallelize(line)
    result = g_reduced_rdd.union(line_rdd).groupByKey()\
    .filter(lambda x:len(x[1])>1).map(lambda x :(x[0], max(x[1])))
    return result.collect()