# print("Common word added: ", key) # if value <= rare_word_treshold: # rare_words.add(key) # print("Rare word: ", key, value) # def correctness(frequency_treshold = 0.006): # return 100 - sum(read_in_test_data(frequency_treshold)) create_dicts() print("Press <Enter> to start correcting.") while input() == "": from correct import read_in_test_data read_in_test_data(word_count, word_frequency, following_word, word_count, common_words, rare_words) # print("Should return a positive number", count_seen_wordpair('en', 'rétt')) # print(following_word['en'].get('sósíalismi')) # frequency_treshold = 0.06 # while(correctness < 95): # print("Testing frequency treshold", frequency_treshold) # cre # print "Should return True", check_real_word_error('horfinn', 'ef') # print "Should return False", check_real_word_error('fresti', 'sem') # print "Should return Vegna", best_guess_if_rwe('sæti', 'vega')
reader = csv.DictReader(csvfile) print("Creating dict from:", Parameters.training_data, file=sys.stderr) prev_word = "" for row in reader: word = row['CorrectWord'] if word == ",": continue if not prev_word: word = word.lower() Parameters.words.add(word) populate_following_word(prev_word, word) Parameters.word_count += 1 prev_word = word # Populates a dictionary of following words and their occurrences def populate_following_word(prev_word, cur_word): if not Parameters.following_word.get(prev_word): Parameters.following_word[prev_word] = {} Parameters.following_word[prev_word][cur_word] = 1 elif not Parameters.following_word[prev_word].get(cur_word): Parameters.following_word[prev_word][cur_word] = 1 else: Parameters.following_word[prev_word][cur_word] += 1 read_files() if __name__ == '__main__': correct.read_in_test_data()