def _c_stopwords(n, dict_path, output_path):
    """
    找出字数为n的词的共同词作为停用词并输出到output_path
    :param n:字数
    :param dict_path:源文件目录
    :param output_path:结果文件目录
    :return:
    """
    wl = WordList.WordList()

    keys = dict_path.keys()
    dic1 = dict_path[keys[0]]
    wl.read_data(keys[0] + '/' + dic1[n])

    for i in keys[1:]:
        path = i + '/' + dict_path[i][n]
        print path
        wl = WordList.WordList()
        wl.read_data(path)
        words = wl.get_data().index.intersection(wl.get_data().index)
    df = pd.DataFrame({'word_segment': list(words)})
    output_file = output_path + '/stopwords_' + str(n) + '.txt'
    df.to_csv(output_file, index=False)
    print df
Example #2
0
def traverse(t):
    try:
        t.label()
    except AttributeError:
        print(t, end=" ")
    else:
        for child in t:
            if type(child) == tree.Tree:
                global noun
                noun = ""
                for words in child:
                    if words[1] == "NN":
                        temp = wordlist.getNoun(words[0])
                        if temp != "":
                            noun = temp
                        elif noun == "":
                            noun = "misc"
                    elif words[1] == "JJ":
                        if noun in ratings:
                            temp = ratings[noun]
                            temp.append(words[0])
                            ratings[noun] = temp
                        else:
                            ratings[noun] = ([words[0]])
Example #3
0
count for s: 4095 because of ^[^s][^s][^s][^s][^s][^s][^s]$

count for t: 5528 because of ^[^t][^t][^t][^t][^t][^t][^t]$
count for u: 6782 because of ^[^u][^u][^u][^u][^u][^u][^u][^u]$
count for v: 8428 because of ^[^v][^v][^v][^v][^v][^v][^v][^v]$
count for w: 8347 because of ^[^w][^w][^w][^w][^w][^w][^w][^w]$
count for x: 8953 because of ^[^x][^x][^x][^x][^x][^x][^x][^x]$
count for y: 8132 because of ^[^y][^y][^y][^y][^y][^y][^y][^y]$
count for z: 9000 because of ^[^z][^z][^z][^z][^z][^z][^z][^z]$

'''

import WordList
import re

wl = WordList.WordList()


def worst_case():
    return worst_case_for_one('.')


def worst_case_for_one(regex):
    # Worst case for the first guess.

    words = [word for word in wl.lines if re.search(regex, word) != None]

    best_regex = None
    best_count = 0

    for c in range(ord('t'), ord('z') + 1):
Example #4
0
'''
This is the actual executioner that gets run.
'''


import WordList
import random
import math

wordlist = WordList.WordList()

class Executioner:
	
	# Initialize an Executioner, ready to play. Please do not change anything already 
	# in this method, but feel free to add more stuff.
	def __init__(self):
		self.iterator = 0
		
		self.alltime_hardest_words = dict([(line.split()[0], int(line.split()[1])) for line in open("alltime_hardest_words.txt", "r") ])
		
		self.random_games = 0
		self.max_random_games = 30
		self.win_count = 0
		self.lose_count = 0.001 # Avoid division by zero.
		self.new_win_or_loss_weight = 1.0 / 20
		self.average_ratio = 0
		self.tries_below_average = 0
		self.tries_before_change = 10
		self.choose_word_count = 2048
		self.choose_word_counts = [ [16**i, 5, 5.0] for i in range(1, 4) ]
		self.select_count_each = 0
Example #5
0
def indexing(index_dict: dict, doc_list: ["Document"]) -> None:
    total_doc_num = len(doc_list)
    for doc in doc_list:
        WordList.update_index_dict(index_dict, doc)
    WordList.calculate_tfidf(index_dict, total_doc_num)
Example #6
0
def get_word():
    word = wl.get_random_word()

    return word.upper()
Example #7
0
def plot_times(filename="English.txt", start=500, stop=5500, step=500):
    """Vary n from 'start' to 'stop', incrementing by 'step'. At each
    iteration, use the create_word_list() from the 'WordList' module to
    generate a list of n randomized words from the specified file.
    
    Time (separately) how long it takes to load a LinkedList, a BST, and
    an AVL with the data set.
    
    Choose 5 random words from the data set. Time how long it takes to
    find each word in each object. Calculate the average search time for
    each object.
    
    Create one plot with two subplots. In the first subplot, plot the
    number of words in each dataset against the build time for each object.
    In the second subplot, plot the number of words against the search time
    for each object.
    
    Inputs:
        filename (str): the file to use in creating the data sets.
        start (int): the lower bound on the sample interval.
        stop (int): the upper bound on the sample interval.
        step (int): the space between points in the sample interval.
    
    Returns:
        Show the plot, but do not return any values.
    """

    def wrapper(func, *args, **kwargs):
        def wrapped():
            return func(*args, **kwargs)
        return wrapped

    def add_all(A, my_list):
        for x in my_list:
            A.add(x)

    def add_all_tree(A, my_list):
        for x in my_list:
            A.insert(x)

    def find_it(A, to_find):
        A.find(to_find)

    def find_average(A, my_list):
        find_times = []
        for x in range(5):
            to_find = random.choice(my_list)
            # to_find = my_list[x]
            wrapped = wrapper(find_it, A, to_find)
            find_times.append(timeit.timeit(wrapped, number=1))
        return np.mean(find_times)





    word_list = WordList.create_word_list()
    word_list = np.random.permutation(word_list)
    x_values = range(start, stop, step)
    list_times = []
    bst_times = []
    avl_times = []
    find_list= []
    find_bst= []
    find_avl= []
    A = LinkedList()
    B = BST()
    C = AVL()

    for x in x_values:
        wrapped = wrapper(add_all, A, word_list[:int(x)])
        list_times.append(timeit.timeit(wrapped, number=1))
        find_list.append(find_average(A, word_list[:int(x)]))
        A.clear()


    for x in x_values:
        wrapped = wrapper(add_all_tree, B, word_list[:int(x)])
        bst_times.append(timeit.timeit(wrapped, number=1))
        find_bst.append(find_average(B, word_list[:int(x)]))
        B.clear()

    for x in x_values:
        wrapped = wrapper(add_all_tree, C, word_list[:int(x)])
        avl_times.append(timeit.timeit(wrapped, number=1))
        find_avl.append(find_average(C, word_list[:int(x)]))
        C.clear()




    plt.subplot(121)
    plt.plot(x_values, list_times, label='Linked List')
    plt.plot(x_values, bst_times, label='BST')
    plt.plot(x_values, avl_times, label='AVL')
    plt.legend(loc='upper left')
    plt.xlabel('data points')
    plt.ylabel('seconds')

    plt.subplot(122)
    plt.plot(x_values, find_list,label='Linked List')
    plt.plot(x_values, find_bst, label='BST')
    plt.plot(x_values, find_avl, label='AVL')
    plt.legend(loc='upper left')
    plt.xlabel('data points')
    plt.ylabel('seconds')

    plt.show()

    plt.xlabel('data points')
Example #8
0
def play_hangman_both():
    guesser = Guesser.Guesser()
    executioner = Executioner.Executioner()
    wins = 0
    losses = 0
    myfile = open('results.txt', 'a')
    while True:
        word = raw_input('What is your word? ').strip()
        if (word == '-q'):
            end_game(guesser, executioner)
            return None

        while len(Guesser.wordlist.find_words(word)) == 0:
            addp = raw_input(
                'That is not a word. ' +
                'Do you want to add it to the dictionary? (y/n) ').strip()
            if addp == 'y':
                print 'Adding...'
                Guesser.wordlist.add_word(word)
                Guesser.wordlist = WordList.WordList()
                Executioner.wordlist = WordList.WordList()
                print 'added.'
            word = raw_input('What is your word? ').strip()
            if (word == '-q'):
                end_game(guesser, executioner)
                return None
        executioner.reset(word)
        guesser.solve(executioner)
        if executioner.did_guesser_win:
            losses += 1
            myfile.write("Computer | " + (executioner._word + ' ' *
                                          (14 - len(executioner._word))) +
                         " | computer\n")
        else:
            myfile.write("Computer | " + (executioner._word + ' ' *
                                          (14 - len(executioner._word))) +
                         " | human\n")
            wins += 1
        print 'You have', wins, 'wins and', losses, 'losses'
        print

        executioner.reset()
        incorrect = ''
        while executioner.is_playing:
            print "word:", executioner.word
            print "guessed letters:", incorrect
            print "body parts remaining:", executioner.body_parts - executioner.man
            while True:
                letter = raw_input('What is your guess? ').strip()
                if (letter == '-q'):
                    end_game(guesser, executioner)
                    return None
                if len(letter) > 1:
                    print "That is more than just one letter. Please try again."
                elif len(letter) == 0:
                    print "You didn't enter anything. Please try again."
                elif ord(letter) < ord('a') or ord(letter) > ord('z'):
                    print "You must enter a lowercase letter. Please try again."
                else:
                    break
            if executioner.guess(letter) != None:
                break
            if not letter in executioner.not_incorrect and not letter in incorrect:
                incorrect += letter
            print
        if executioner.did_guesser_win:
            wins += 1
            myfile.write("Human    | " + (executioner._word + ' ' *
                                          (14 - len(executioner._word))) +
                         " | human\n")
        else:
            myfile.write("Human    | " + (executioner._word + ' ' *
                                          (14 - len(executioner._word))) +
                         " | computer\n")
            losses += 1
        print 'You have', wins, 'wins and', losses, 'losses'
        print
    myfile.close()
 def test_adding_word(self):
     wordlist = WordList()
     wordlist.add('ha')
     self.assertEqual(wordlist.list[-1], 'ha')
 def test_generate_swear_word_dict(self):
     word_list = WordList()
     print(word_list)