def f(record):
    """"""
    table = string.maketrans("", "")
    raw_summary = record[1]['summary']
    temp = raw_summary.encode("utf-8").translate(table, string.punctuation)
    temp_str = re.sub("[+——!,。?、~@#¥%……&*()]+".decode("utf8"),
                    "".decode("utf8"), temp).replace("\r\n", " ").replace("\n", " ")
    summary = re.sub(r'([\d]+)', ' ', temp_str).lower()
    url = record[1]['url']
    _temp = dict(collections.Counter(summary.split()))
    result = [(key,{url:value}) for key,value in _temp.items()]
    return result


def reduce_values(record):
    """"""
    result = {}
    for element in record[1]:
        key, value = element.items()[0]
        result[key] = result.get(key, 0) + value
    return [(record[0], result)]

rdd = sc.mongoPairRDD("mongodb://localhost/testmr.test_in")
newrdd = rdd.flatMap(f)
sortrdd = newrdd.sortByKey()
resultrdd = sortrdd.groupByKey().flatMap(reduce_values)
resultrdd.saveToMongoDB('mongodb://localhost:27017/testmr.test_out')