def reducefn(author, titles):
    from stopwords import allStopWords
    from utils import normalize_words
    terms_freq = {}
    for title in titles:
        ### Main tasks to do: normalize words (lower case), delete stopwords, punctuation,
        ### hyphens, single letter words and count the cumulative frequency.
        title_terms = normalize_words(title)
        for term in title_terms.split():
            ## allStopWords comes from stopwords file.
            ## Single letter words are removed.
            if not (allStopWords.has_key(term) or len(term) == 1):
                if terms_freq.has_key(term):
                    terms_freq[term] = terms_freq[term] + 1
                else:
                    terms_freq[term] = 1
    ## Reduce results.
    return terms_freq
def reducefn(author, titles):
    from stopwords import allStopWords
    from utils import normalize_words
    terms_freq = {}
    for title in titles:
        ### Main tasks to do: normalize words (lower case), delete stopwords, punctuation,
        ### hyphens, single letter words and count the cumulative frequency.
        title_terms = normalize_words(title)
        for term in title_terms.split():
            ## allStopWords comes from stopwords file.
            ## Single letter words are removed.
            if not (allStopWords.has_key(term) or len(term) == 1):
                if terms_freq.has_key(term):
                    terms_freq[term] = terms_freq[term] + 1
                else:
                    terms_freq[term] = 1
    ## Reduce results.
    return terms_freq
def mapfn(filenumber, filecontent):
    from utils import normalize_words
    ## Emits all the contents for each author
    author_contents = {}
    for line in filecontent.splitlines():
        ## conf:::author_1::author2:::title
        docdata = line.split(':::')
        authors_list = docdata[1].split('::')
        title = docdata[-1]
        for author in authors_list:
            author = normalize_words(author)
            if author_contents.has_key(author):
                author_contents[author] = author_contents[author] + " " + title
            else:
                author_contents[author] = title
    ## Map results
    for author in author_contents.keys():
        yield author, author_contents[author]
def mapfn(filenumber, filecontent):
    from utils import normalize_words
    ## Emits all the contents for each author
    author_contents = {}
    for line in filecontent.splitlines():
        ## conf:::author_1::author2:::title
        docdata = line.split(':::')
        authors_list = docdata[1].split('::')
        title = docdata[-1]
        for author in authors_list:
            author = normalize_words(author)
            if author_contents.has_key(author):
                author_contents[author] = author_contents[author] + " " + title
            else:
                author_contents[author] = title
    ## Map results
    for author in author_contents.keys():
        yield author, author_contents[author]