Esempio n. 1
0
def predict(predicted, number):
	if not trained:
		return

	global code

	predicted_so_far = predicted.get()

	if number == '0':
		code += ' '
		predicted_so_far += ' '
	elif number == '':
		predicted_so_far = word_prediction.run(code, unigram_letters, bigram_letters, unigram_words, bigram_words)
	else:
		code += number
		predicted_so_far = word_prediction.run(code, unigram_letters, bigram_letters, unigram_words, bigram_words)

	predicted.set(predicted_so_far)
Esempio n. 2
0
def predict(predicted, number):
    if not trained:
        return

    global code

    predicted_so_far = predicted.get()

    if number == "0":
        code += " "
        predicted_so_far += " "
    elif number == "":
        predicted_so_far = word_prediction.run(code, unigram_letters, bigram_letters, unigram_words, bigram_words)
    else:
        code += number
        predicted_so_far = word_prediction.run(code, unigram_letters, bigram_letters, unigram_words, bigram_words)

    predicted.set(predicted_so_far)
Esempio n. 3
0
def test():
	unigram_letters_filename = 'training/unigram_letter.json'
	bigram_letters_filename = 'training/bigram_letter.json'
	unigram_words_filename = 'training/unigram_words.json'
	bigram_words_filename = 'training/bigram_words.json'

	with open(unigram_letters_filename) as data_file:
		unigram_letters = json.load(data_file)
	with open(bigram_letters_filename) as data_file:
		bigram_letters = json.load(data_file)
	with open(unigram_words_filename) as data_file:
		unigram_words = json.load(data_file)
	with open(bigram_words_filename) as data_file:
		bigram_words = json.load(data_file)

	
	filenames = ['texto_coloquial.txt', 'texto_culto.txt', 'texto_noticia.txt']

	for filename in filenames:
		create_set('corpus/' + filename)

		with open('corpus/test.json', 'r') as fp:
			sentences = json.load(fp)

		words_similarities = []
		sentences_similarities = []

		for sentence in sentences:
			code = functions.sentence_to_numbers(sentence).strip()
			prediction = word_prediction.run(code, unigram_letters, bigram_letters, unigram_words, bigram_words)

			#Sentence similarity
			vector_s_1 = functions.text_to_vector(sentence)
			vector_s_2 = functions.text_to_vector(prediction)
			sentences_similarities.append(functions.get_cosine(vector_s_1, vector_s_2))

			sentence_words = sentence.split()
			prediction_words = prediction.split()
			
			#Word similarity
			for i in range(0, len(sentence_words)):
				vector_w_1 = functions.word_to_vector(sentence_words[i])
				vector_w_2 = functions.word_to_vector(prediction_words[i])
				words_similarities.append(functions.get_cosine(vector_w_1, vector_w_2))

		words_avg = sum(words_similarities) / float(len(words_similarities))
		sentences_avg = sum(sentences_similarities) / float(len(sentences_similarities))

		print("Similaridad media en el {0}:\n\t-Frases: {1:.2f}\n\t-Palabras: {2:.2f}".format(filename.replace('_', ' ').replace('.txt', ''), sentences_avg, words_avg))