def predict(predicted, number): if not trained: return global code predicted_so_far = predicted.get() if number == '0': code += ' ' predicted_so_far += ' ' elif number == '': predicted_so_far = word_prediction.run(code, unigram_letters, bigram_letters, unigram_words, bigram_words) else: code += number predicted_so_far = word_prediction.run(code, unigram_letters, bigram_letters, unigram_words, bigram_words) predicted.set(predicted_so_far)
def predict(predicted, number): if not trained: return global code predicted_so_far = predicted.get() if number == "0": code += " " predicted_so_far += " " elif number == "": predicted_so_far = word_prediction.run(code, unigram_letters, bigram_letters, unigram_words, bigram_words) else: code += number predicted_so_far = word_prediction.run(code, unigram_letters, bigram_letters, unigram_words, bigram_words) predicted.set(predicted_so_far)
def test(): unigram_letters_filename = 'training/unigram_letter.json' bigram_letters_filename = 'training/bigram_letter.json' unigram_words_filename = 'training/unigram_words.json' bigram_words_filename = 'training/bigram_words.json' with open(unigram_letters_filename) as data_file: unigram_letters = json.load(data_file) with open(bigram_letters_filename) as data_file: bigram_letters = json.load(data_file) with open(unigram_words_filename) as data_file: unigram_words = json.load(data_file) with open(bigram_words_filename) as data_file: bigram_words = json.load(data_file) filenames = ['texto_coloquial.txt', 'texto_culto.txt', 'texto_noticia.txt'] for filename in filenames: create_set('corpus/' + filename) with open('corpus/test.json', 'r') as fp: sentences = json.load(fp) words_similarities = [] sentences_similarities = [] for sentence in sentences: code = functions.sentence_to_numbers(sentence).strip() prediction = word_prediction.run(code, unigram_letters, bigram_letters, unigram_words, bigram_words) #Sentence similarity vector_s_1 = functions.text_to_vector(sentence) vector_s_2 = functions.text_to_vector(prediction) sentences_similarities.append(functions.get_cosine(vector_s_1, vector_s_2)) sentence_words = sentence.split() prediction_words = prediction.split() #Word similarity for i in range(0, len(sentence_words)): vector_w_1 = functions.word_to_vector(sentence_words[i]) vector_w_2 = functions.word_to_vector(prediction_words[i]) words_similarities.append(functions.get_cosine(vector_w_1, vector_w_2)) words_avg = sum(words_similarities) / float(len(words_similarities)) sentences_avg = sum(sentences_similarities) / float(len(sentences_similarities)) print("Similaridad media en el {0}:\n\t-Frases: {1:.2f}\n\t-Palabras: {2:.2f}".format(filename.replace('_', ' ').replace('.txt', ''), sentences_avg, words_avg))