def main():
    # Read the list of files from the command line arguments
    # Make sure they are ordered in alphabetical order
    files = sorted(sys.argv[1:])
    file_input = fileinput.FileInput(files)

    running_median = median.RunningMedian()
    for line in file_input:
        words = list(tokenize.word_tokenize(line))
        running_median.add(len(words))
        print ('%.1f' % running_median.get_median())
Ejemplo n.º 2
0
def main():
    # Read the list of files from the command line arguments
    # Make sure they are ordered in alphabetical order
    files = sorted(sys.argv[1:])
    file_input = fileinput.FileInput(files)

    running_median = median.RunningMedian()
    for line in file_input:
        words = list(tokenize.word_tokenize(line))
        running_median.add(len(words))
        print('%.1f' % running_median.get_median())
Ejemplo n.º 3
0
def main():
    # Read the list of files from the command line arguments
    # Make sure they are ordered in alphabetical order
    files = sorted(sys.argv[1:])
    file_input = fileinput.FileInput(files)

    # Count frequencies using a Counter object based on Python's dict
    # For more memory efficient implementation we may use Trie data structure
    word_counter = collections.Counter()
    for line in file_input:
        for word in tokenize.word_tokenize(line):
            word_counter[word] += 1

    for word in sorted(word_counter.keys()):
        print('%s\t%s' % (word, word_counter[word]))
def main():
    # Read the list of files from the command line arguments
    # Make sure they are ordered in alphabetical order
    files = sorted(sys.argv[1:])
    file_input = fileinput.FileInput(files)

    # Count frequencies using a Counter object based on Python's dict
    # For more memory efficient implementation we may use Trie data structure
    word_counter = collections.Counter()
    for line in file_input:
        for word in tokenize.word_tokenize(line):
            word_counter[word] += 1

    for word in sorted(word_counter.keys()):
        print ('%s\t%s' % (word, word_counter[word]))