def predict_sentence(input_sentence, model_file): text = source_to_seq(input_sentence) batch_size = 2 tf.reset_default_graph() input_data, targets, lr, target_sequence_length, max_target_sequence_length, source_sequence_length = get_model_inputs( batch_size) end_points = seq2seq_model(input_data, targets, lr, target_sequence_length, max_target_sequence_length, source_sequence_length, question_word_to_int, answer_word_to_int, encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, batch_size, train=False) with tf.Session() as sess: # Load saved model sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, model_file) #Multiply by batch_size to match the model's input parameters answer_logits = sess.run( end_points['inference_logits'], { input_data: [text] * batch_size, target_sequence_length: [len(text)] * batch_size, source_sequence_length: [len(text)] * batch_size })[0] pad = question_word_to_int["<PAD>"] print('Original Text:', input_sentence) print('\nSource') print(' Word Ids: {}'.format([i for i in text])) print(' Input Words: {}'.format(" ".join( [question_int_to_word[i] for i in text]))) print('\nTarget') print(' Word Ids: {}'.format([i for i in answer_logits if i != pad])) print(' Response Words: {}'.format(" ".join( [answer_int_to_word[i] for i in answer_logits if i != pad])))
def prepare_session(vocabulary_size): logger.info('Preparing Tensorflow session') # Define a session tf.reset_default_graph() session = tf.InteractiveSession() # load the model inputs inputs, targets, lr, keep_prob = model_input() # setting the sequence length sequence_length = tf.placeholder_with_default(max_question_size, None, name='sequence_length') # getting the shape of the input tensor input_shape = tf.shape(inputs) logger.info('Initializing Seq2Seq model') # getting the training and test predictions training_predictions, test_predictions = seq2seq_model( tf.reverse(inputs, [-1]), targets, keep_prob, batch_size, sequence_length, vocabulary_size, vocabulary_size, encoding_embedding_size, decoding_embedding_size, rnn_size, num_of_layers, sos_id, eos_id) # Setting the Loss Error, the Optimizer and Gradient Clipping with tf.name_scope("optimization"): loss_error = seq2seq.sequence_loss( training_predictions, targets, tf.ones([input_shape[0], sequence_length])) # Optimizer _optimizer = tf.train.AdamOptimizer(param_learning_rate) # Gradient clipping gradients = _optimizer.compute_gradients(loss_error) clipped_gradients = [ (tf.clip_by_value(gradient_tensor, -5., 5.), gradient_variable) for gradient_tensor, gradient_variable in gradients if gradient_tensor is not None ] optimizer = _optimizer.apply_gradients(clipped_gradients) logger.info('Tensorflow session is ready') return session, optimizer, loss_error, inputs, targets, lr, keep_prob, sequence_length, test_predictions
import json import sys import os # In[ ]: import re from collections import Counter from keras.utils import to_categorical from keras.callbacks import Callback from config import * from seq2seq_model import seq2seq_model # In[ ]: model, encoder_model, decoder_model, inf_model = seq2seq_model() train_id_list = open('hw2_1_data/training_data/id.txt').read().split() train_data = { i: np.load('hw2_1_data/training_data/feat/' + i + '.npy') for i in train_id_list } train_label = json.loads(open('hw2_1_data/training_label.json', 'r').read()) test_id_list = open('hw2_1_data/testing_data/id.txt').read().split() test_data = { i: np.load('hw2_1_data/testing_data/feat/' + i + '.npy') for i in test_id_list } test_label = json.loads(open('hw2_1_data/testing_label.json', 'r').read()) # In[ ]:
def main(): # parameters feature_size = 512 * 7 * 7 learning_rate = 0.0001 num_epochs = 100 batch_size = 6 # create the save log file print("Create the directory") if not os.path.exists("./save"): os.makedirs("./save") if not os.path.exists("./logfile"): os.makedirs("./logfile") if not os.path.exists("./logfile/s2s"): os.makedirs("./logfile/s2s") # load my Dataset train_dataset = seq2seq_Dataset.seq2seq_Dataset(mode="train") test_dataset = seq2seq_Dataset.seq2seq_Dataset(mode="valid") print('the train_dataset has %d size.' % (len(train_dataset.data))) print('the valid_dataset has %d size.' % (len(test_dataset.data))) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, collate_fn=my_collate) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=8, collate_fn=my_collate) print('the train_loader has %d size.' % (len(train_loader))) print('the test_loader has %d size.' % (len(test_loader))) model = seq2seq_model.seq2seq_model(feature_size) # GPU enable use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print('Device used:', device) if torch.cuda.is_available(): model = model.to(device) # setup optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.5, 0.999)) load_checkpoint('./save/RNN-043.pth', model, optimizer) criterion = nn.CrossEntropyLoss() train_loss_list = [] train_acc_list = [] test_loss_list = [] test_acc_list = [] print("Starting training...") best_accuracy = -np.inf for epoch in range(num_epochs): model.train() print("Epoch:", epoch + 1) epoch_train_loss = 0.0 train_acc = 0.0 if (epoch + 1) == 50: optimizer.param_groups[0]['lr'] /= 2 for i, (feature, label) in enumerate(train_loader): batch_size = len(train_loader) sampled_feature = torch.Tensor() sampled_label = torch.LongTensor() for i in range(batch_size): t_feature = feature[i] t_label = label[i] t_sampled_feature, t_sampled_label, lengths = random_sample( t_feature, t_label) sampled_feature = torch.cat( (sampled_feature, t_sampled_feature), 1) sampled_label = torch.cat((sampled_label, t_sampled_label), 1) optimizer.zero_grad() sampled_feature = Variable(sampled_feature).to(device) sampled_label = Variable(sampled_label).to(device) lengths = Variable(lengths).to(device) # print("feature :",sampled_feature.shape) output = model(sampled_feature) # print("feature :",sampled_label.shape) # print("output :",output.shape) train_loss = 0 for i in range(batch_size): t_output = output[:, i, :] t_sampled_label = sampled_label[:, i] loss = criterion(t_output, t_sampled_label) train_loss += loss train_loss /= batch_size train_loss.backward() optimizer.step() epoch_train_loss += train_loss.item() # Accuracy acc = 0. for i in range(batch_size): t_output = output[:, i, :] t_sampled_label = sampled_label[:, i] t_output_label = torch.argmax(t_output, 1).cpu() t_acc = np.mean( (t_output_label == t_sampled_label.cpu()).numpy()) acc += t_acc acc /= batch_size train_acc += acc print('Epoch [%d/%d], Iter [%d/%d] loss %.4f,Acc %.4f, LR = %.6f' % (epoch, num_epochs, i + 1, len(train_loader), train_loss.item(), acc, optimizer.param_groups[0]['lr'])) if (epoch) % 10 == 0: save_checkpoint('./save/s2s-%03i.pth' % (epoch), model, optimizer) # testing with torch.no_grad(): model.eval() epoch_test_loss = 0.0 test_acc = 0. for i, (feature, label) in enumerate(test_loader): sampled_feature, sampled_label, lengths = random_sample( feature[0], label[0], valid=True) sampled_feature = Variable(sampled_feature).to(device) sampled_label = Variable(sampled_label).to(device) lengths = Variable(lengths).to(device) print("feature :", sampled_feature.shape) output = model(sampled_feature) sampled_label = sampled_label.view(-1) # print("output :",output.shape) # print("sampled_label :",sampled_label.shape) test_loss = criterion(output, sampled_label) epoch_test_loss += test_loss.item() # Accuracy output_label = torch.argmax(output, 1).cpu() #if i==0: #print(lengths) #print(output_label) acc = np.mean((output_label == sampled_label.cpu()).numpy()) print("Acc for %d : %.4f" % (lengths, acc)) test_acc += acc """ sampled_feature,sampled_label,lengths = random_sample(feature,label,valid = True) optimizer.zero_grad() sampled_feature = Variable(sampled_feature).to(device) sampled_label = Variable(sampled_label).to(device) lengths = Variable(lengths).to(device) # print("feature :",sampled_feature.shape) output = model(sampled_feature) test_loss = criterion(output, sampled_label) epoch_test_loss += test_loss.item() # Accuracy output_label = torch.argmax(output,1).cpu() acc = np.mean((output_label == sampled_label.cpu()).numpy()) test_acc += acc """ print( '\n============\nEpoch [%d/%d] ,Train: Loss: %.4f | Acc: %.4f ,Validation: loss: %.4f | Acc: %.4f' % (epoch, num_epochs, epoch_train_loss / len(train_loader), train_acc / len(train_loader), epoch_test_loss / len(test_loader), test_acc / len(test_loader))) # save loss data train_loss_list.append(epoch_train_loss / len(train_loader)) train_acc_list.append(train_acc / len(train_loader)) test_loss_list.append(epoch_test_loss / len(test_loader)) test_acc_list.append(test_acc / len(test_loader)) if (test_acc / len(test_loader) > best_accuracy): best_accuracy = test_acc / len(test_loader) save_checkpoint( './save/s2s-RNN043-%03i-%.6f.pth' % (epoch, best_accuracy), model, optimizer) print('Save best model , test_acc = %.6f...' % (best_accuracy)) print('-' * 88) with open('./logfile/s2s/train_loss.pkl', 'wb') as f: pickle.dump(train_loss_list, f) with open('./logfile/s2s/train_acc.pkl', 'wb') as f: pickle.dump(train_acc_list, f) with open('./logfile/s2s/test_loss.pkl', 'wb') as f: pickle.dump(test_loss_list, f) with open('./logfile/s2s/test_acc.pkl', 'wb') as f: pickle.dump(test_acc_list, f)
BATCH_SIZE = 32 TEST_EPOCHS = 1 SAVED_MODEL_DIR = 'saved_model_seq2seq' # shuffling data idxQ, idxA = helper.shuffle_data(idxQ, idxA) # splitting data into train, test, validation trainX, trainY, testX, testY, valX, valY = helper.split_data( idxQ, idxA, TRAIN_DATA_PERCENT, TEST_DATA_PERCENT, VAL_DATA_PERCENT) # creating model class object model = seq2seq_model(vocabSizeEncoder=VOCAB_SIZE_ENCODER, vocabSizeDecoder=VOCAB_SIZE_DECODER, maxLenX=MAX_LEN_X, maxLenY=MAX_LEN_Y, embedDims=EMBED_DIMS, numLayers=NUMBER_OF_LAYERS, hiddenUnits=HIDDEN_UNITS, lr=LEARNING_RATE) # re- building tensorflow graph model.build_model_graph() # creating saver object to restore saver = tf.train.Saver() # test batch generator object batchGenTest = helper.get_next_batch(BATCH_SIZE, testX, testY) # restoring
def run_training(model_file): tf.reset_default_graph() input_data, targets, lr, target_sequence_length, max_target_sequence_length, source_sequence_length = get_model_inputs( batch_size) end_points = seq2seq_model(input_data, targets, lr, target_sequence_length, max_target_sequence_length, source_sequence_length, question_word_to_int, answer_word_to_int, encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, batch_size, train=True) start_epoch = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, model_file) try: for epoch_i in range(start_epoch + 1, epochs + 1): for batch_i, (targets_batch, sources_batch, targets_lengths, sources_lengths) in enumerate( get_batches(train_target, train_source, batch_size, question_word_to_int['<PAD>'], answer_word_to_int['<PAD>'])): # Training step _, loss = sess.run( [end_points['train_op'], end_points['loss']], feed_dict={ input_data: sources_batch, targets: targets_batch, lr: learning_rate, target_sequence_length: targets_lengths, source_sequence_length: sources_lengths }) # Debug message updating us on the status of the training if batch_i % display_step == 0: # Calculate validation cost validation_loss = sess.run( end_points['loss'], { input_data: valid_sources_batch, targets: valid_targets_batch, lr: learning_rate, target_sequence_length: valid_targets_lengths, source_sequence_length: valid_sources_lengths }) print( 'Epoch {:>3}/{} Batch {:>4}/{} - Loss: {:>6.3f} - Validation loss: {:>6.3f}' .format(epoch_i, epochs, batch_i, len(train_source) // batch_size, loss, validation_loss)) # Save Model saver.save(sess, model_file) start_epoch += 1 except KeyboardInterrupt: print( '[INFO] Interrupt manually, try saving checkpoint for now...') saver.save(sess, model_file) print( '[INFO] Last epoch were saved, next time will start from epoch {}.' .format(epoch_i + 1))
# 构造graph train_graph = tf.Graph() with train_graph.as_default(): # 获得模型输入 input_data, targets, lr, target_sequence_length, max_target_sequence_length, source_sequence_length = get_inputs() training_decoder_output, predicting_decoder_output = sq.seq2seq_model(input_data, targets, lr, target_sequence_length, max_target_sequence_length, source_sequence_length, len(da.source_letter_to_int), len(da.target_letter_to_int), encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, batch_size) training_logits = tf.identity(training_decoder_output.rnn_output, 'logits') predicting_logits = tf.identity(predicting_decoder_output.sample_id, name='predictions') masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks') with tf.name_scope("optimization"): # Loss function