コード例 #1
0
ファイル: run_songs.py プロジェクト: abrazinskas/Theano-RNN

rnn = RNNTheano(VOCAB_SIZE+SPEC_SYMBOLS_COUNT, hidden_dim = HIDDEN_LAYER_SIZE)
if PRELOAD_WEIGHTS:
    print "preloading weights"
    rnn.preload_weights(weights_file)
    train_loss = []
    test_loss = []
else:
    print "training the model"
    train_loss = []
    test_loss = []
    for e in range(EPOCHS):
        i = 0
        print("--- Epoch "+str(e+1)+" ---")
        train_loss.append(rnn.total_loss(itertools.islice(load_songs(train_file,word_to_index),MAX_L_SENTENCES)))
        test_loss.append(rnn.total_loss(itertools.islice(load_songs(test_file,word_to_index),MAX_L_SENTENCES)))
        sentences = load_songs(train_file,word_to_index)
        for sentence in itertools.islice(sentences, MAX_SENTENCES):
            i+=1
            sentence.insert(0,word_to_index[sentence_start_token])
            y = copy.copy(sentence)
            y.pop(0)
            y.append(word_to_index[sentence_end_token])
            rnn.train(sentence, y, ALPHA)
            if i % 10 == 0:
                print("processed "+str(i))

# saving weights
rnn.save_weights(weights_file)
コード例 #2
0
ファイル: run.py プロジェクト: ixlan/Theano-RNN
# read the vocab
index_to_word, word_to_index = read_vocabulary(vocab_file, 8000)
# adding special symbols
index_to_word.append(sentence_end_token)
index_to_word.append(sentence_start_token)
word_to_index[sentence_start_token] = VOCAB_SIZE+1
word_to_index[sentence_end_token] = VOCAB_SIZE+2

if THEANO:
    rnn = RNNTheano(VOCAB_SIZE+SPEC_SYMBOLS_COUNT, hidden_dim = 50)
else:
    rnn = RNN(VOCAB_SIZE+SPEC_SYMBOLS_COUNT, VOCAB_SIZE+SPEC_SYMBOLS_COUNT,hidden_dim = 100)
# generate sentences
print("training the model")
loss = [rnn.total_loss(itertools.islice(tokenize_file(word_to_index, train_file), MAX_L_SENTENCES))]
for e in range(EPOCHS):
    i = 0
    print("--- Epoch "+str(e+1)+" ---")
    loss.append(rnn.total_loss(itertools.islice(tokenize_file(word_to_index, train_file), MAX_L_SENTENCES)))
    sentences = tokenize_file(word_to_index, train_file)
    for sentence in itertools.islice(sentences, MAX_SENTENCES):
        i+=1
        sentence.insert(0,word_to_index[sentence_start_token])
        y = copy.copy(sentence)
        y.pop(0)
        y.append(word_to_index[sentence_end_token])
        rnn.train(sentence, y, ALPHA)
        if i % 10 == 0:
            print("preprocessed "+str(i))
コード例 #3
0
index_to_word.append(sentence_end_token)
index_to_word.append(sentence_start_token)
word_to_index[sentence_start_token] = VOCAB_SIZE + 1
word_to_index[sentence_end_token] = VOCAB_SIZE + 2

if THEANO:
    rnn = RNNTheano(VOCAB_SIZE + SPEC_SYMBOLS_COUNT, hidden_dim=50)
else:
    rnn = RNN(VOCAB_SIZE + SPEC_SYMBOLS_COUNT,
              VOCAB_SIZE + SPEC_SYMBOLS_COUNT,
              hidden_dim=100)
# generate sentences
print("training the model")
loss = [
    rnn.total_loss(
        itertools.islice(tokenize_file(word_to_index, train_file),
                         MAX_L_SENTENCES))
]
for e in range(EPOCHS):
    i = 0
    print("--- Epoch " + str(e + 1) + " ---")
    loss.append(
        rnn.total_loss(
            itertools.islice(tokenize_file(word_to_index, train_file),
                             MAX_L_SENTENCES)))
    sentences = tokenize_file(word_to_index, train_file)
    for sentence in itertools.islice(sentences, MAX_SENTENCES):
        i += 1
        sentence.insert(0, word_to_index[sentence_start_token])
        y = copy.copy(sentence)
        y.pop(0)