def testRNN(vocabulary_file, training_dir): print("Reading vocabulary " + vocabulary_file + "...") words, dictionary = read_vocabulary(vocabulary_file, MAX_VOCAB_SIZE) print("Reading sentences and training RNN...") start = timer() rnn = RNNExtended(len(words), HIDDEN_LAYER_SIZE) num_words = 0 for i in range(NUM_ITER): sentences = tokenize_files(dictionary, training_dir) for sentence in itertools.islice(sentences, MAX_SENTENCES): # Todo, create context window for each sentence? rnn.train(sentence) num_words += len(sentence) print("Iteration " + str(i + 1) + "/" + str(NUM_ITER) + " finished (" + str(num_words) + " words)") num_words = 0 print("- Took %.2f sec" % (timer() - start))
def testSkipGram(vocabulary_file, training_dir): last_sentence = None print("Reading vocabulary " + vocabulary_file + "...") words, dictionary = read_vocabulary(vocabulary_file, MAX_VOCAB_SIZE) print("Reading sentences and training SkipGram...") start = timer() skip_gram = SkipGram(len(words), WINDOW_SIZE, HIDDEN_LAYER_SIZE) num_words = 0 for i in range(NUM_ITER): sentences = tokenize_files(dictionary, training_dir) for sentence in itertools.islice(sentences, MAX_SENTENCES): last_sentence = sentence skip_gram.train(sentence) num_words += len(sentence) ll = skip_gram.train(last_sentence, compute_ll=True) print("Iteration " + str(i + 1) + "/" + str(NUM_ITER) + " finished (" + str(num_words) + " words)") print("Log-likelihood: " + str(ll)) num_words = 0 print("- Took %.2f sec" % (timer() - start))