def wordCountMapper(document): """for each word in the document, emit (word,1)""" for word in tokenize(document): yield (word, 1)
def wordCount_old(documents): """word count not using MapReduce""" return Counter(word for document in documents for word in tokenize(document))