Beispiel #1
0
def zipf_csv(txt, csv):
    ''' Write out a Zipf's law csv file based on a list of words.
        txt: Path and name of text file (like a book) to evaluate
        csv: Path and name of resulting csv file to write out
        c: List of words in the given input file
        d: Dict of items and their frequency in c.
        s: List of frequency/word tuples, to be sorted (a form of Ranking) and
           then changed into a list of log(f)/log(r)/word tuples, to be output
           and graphed'''
    c = critique_book_13_2.file_to_word_list(txt)
    d = sum_freq(c)
    s = sort_val(d)
    add_rank(s)
    out_file(s, csv)
def zipf_csv(txt, csv):
    ''' Write out a Zipf's law csv file based on a list of words.
        txt: Path and name of text file (like a book) to evaluate
        csv: Path and name of resulting csv file to write out
        c: List of words in the given input file
        d: Dict of items and their frequency in c.
        s: List of frequency/word tuples, to be sorted (a form of Ranking) and
           then changed into a list of log(f)/log(r)/word tuples, to be output
           and graphed'''
    c = critique_book_13_2.file_to_word_list(txt)
    d = sum_freq(c)
    s = sort_val(d)
    add_rank(s)
    out_file(s, csv)
#
# Modify the previous program to read a word list (see Section 9.1) and then
# print all the words in the book that are not in the word list. How many of
# them are typos? How many of them are common words that should be in the word
# list, and how many of them are really obscure?

import critique_book_13_2
import sys


# This is pretty much all of exercise 9.1
def read_word_list():
    """This function opens the file word.txt and reads it in, stripping all of
the newlines from it.  It then returns the contents of the file as a list"""
    fin = open("words.txt", "r")
    word_list = []
    for line in fin:
        word = line.strip()
        word_list.append(word)
    fin.close()
    return word_list


if __name__ == "__main__":
    word_list = read_word_list()
    doc_word_list = critique_book_13_2.file_to_word_list(sys.argv[1])
    census = critique_book_13_2.word_counter(doc_word_list)
    for word in census:
        if word not in word_list:
            print word
# them are typos? How many of them are common words that should be in the word
# list, and how many of them are really obscure?


import critique_book_13_2
import sys



# This is pretty much all of exercise 9.1
def read_word_list() :
    """This function opens the file word.txt and reads it in, stripping all of
the newlines from it.  It then returns the contents of the file as a list"""
    fin = open("words.txt", "r")
    word_list = []
    for line in fin :
        word = line.strip()
        word_list.append(word)
    fin.close()
    return word_list

if __name__ == "__main__" :
    word_list = read_word_list()
    doc_word_list = critique_book_13_2.file_to_word_list( sys.argv[1] )
    census = critique_book_13_2.word_counter ( doc_word_list )
    for word in census :
        if word not in word_list :
            print word