def main(): # Read the list of files from the command line arguments # Make sure they are ordered in alphabetical order files = sorted(sys.argv[1:]) file_input = fileinput.FileInput(files) running_median = median.RunningMedian() for line in file_input: words = list(tokenize.word_tokenize(line)) running_median.add(len(words)) print ('%.1f' % running_median.get_median())
def main(): # Read the list of files from the command line arguments # Make sure they are ordered in alphabetical order files = sorted(sys.argv[1:]) file_input = fileinput.FileInput(files) running_median = median.RunningMedian() for line in file_input: words = list(tokenize.word_tokenize(line)) running_median.add(len(words)) print('%.1f' % running_median.get_median())
def main(): # Read the list of files from the command line arguments # Make sure they are ordered in alphabetical order files = sorted(sys.argv[1:]) file_input = fileinput.FileInput(files) # Count frequencies using a Counter object based on Python's dict # For more memory efficient implementation we may use Trie data structure word_counter = collections.Counter() for line in file_input: for word in tokenize.word_tokenize(line): word_counter[word] += 1 for word in sorted(word_counter.keys()): print('%s\t%s' % (word, word_counter[word]))
def main(): # Read the list of files from the command line arguments # Make sure they are ordered in alphabetical order files = sorted(sys.argv[1:]) file_input = fileinput.FileInput(files) # Count frequencies using a Counter object based on Python's dict # For more memory efficient implementation we may use Trie data structure word_counter = collections.Counter() for line in file_input: for word in tokenize.word_tokenize(line): word_counter[word] += 1 for word in sorted(word_counter.keys()): print ('%s\t%s' % (word, word_counter[word]))