def print_top(filename): word_count = count_words(filename) word_count_pairs = word_count.items() times.seg_start('sorting start') word_count_pairs = sorted(word_count_pairs, key = lambda pair: pair[-1], reverse=True) times.seg_stop('sorting stop') for i in range(3): print word_count_pairs[i][0], word_count_pairs[i][1] times.last_seg()
def count_words(filename): """helper method for the other 2, return dict""" # read file into words times.seg_start('count words start') f = open(filename, 'rU') word_count = {} for line in f: words = line.split() for word in words: word = word.lower() if word in word_count: # 0.069 for 640k #if word_count.get(word): # 0.03s for 250k text, 0.084 for 640k word_count[word] += 1 else: word_count[word] = 1 times.seg_stop('count words stop') return word_count