예제 #1
0
def predict_sentence(input_sentence, model_file):
    text = source_to_seq(input_sentence)
    batch_size = 2

    tf.reset_default_graph()
    input_data, targets, lr, target_sequence_length, max_target_sequence_length, source_sequence_length = get_model_inputs(
        batch_size)
    end_points = seq2seq_model(input_data,
                               targets,
                               lr,
                               target_sequence_length,
                               max_target_sequence_length,
                               source_sequence_length,
                               question_word_to_int,
                               answer_word_to_int,
                               encoding_embedding_size,
                               decoding_embedding_size,
                               rnn_size,
                               num_layers,
                               batch_size,
                               train=False)
    with tf.Session() as sess:
        # Load saved model
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, model_file)
        #Multiply by batch_size to match the model's input parameters
        answer_logits = sess.run(
            end_points['inference_logits'], {
                input_data: [text] * batch_size,
                target_sequence_length: [len(text)] * batch_size,
                source_sequence_length: [len(text)] * batch_size
            })[0]

    pad = question_word_to_int["<PAD>"]

    print('Original Text:', input_sentence)

    print('\nSource')
    print('  Word Ids:    {}'.format([i for i in text]))
    print('  Input Words: {}'.format(" ".join(
        [question_int_to_word[i] for i in text])))

    print('\nTarget')
    print('  Word Ids:       {}'.format([i for i in answer_logits
                                         if i != pad]))
    print('  Response Words: {}'.format(" ".join(
        [answer_int_to_word[i] for i in answer_logits if i != pad])))
예제 #2
0
def prepare_session(vocabulary_size):
    logger.info('Preparing Tensorflow session')
    # Define a session
    tf.reset_default_graph()
    session = tf.InteractiveSession()

    # load the model inputs
    inputs, targets, lr, keep_prob = model_input()

    # setting the sequence length
    sequence_length = tf.placeholder_with_default(max_question_size,
                                                  None,
                                                  name='sequence_length')

    # getting the shape of the input tensor
    input_shape = tf.shape(inputs)

    logger.info('Initializing Seq2Seq model')
    # getting the training and test predictions
    training_predictions, test_predictions = seq2seq_model(
        tf.reverse(inputs,
                   [-1]), targets, keep_prob, batch_size, sequence_length,
        vocabulary_size, vocabulary_size, encoding_embedding_size,
        decoding_embedding_size, rnn_size, num_of_layers, sos_id, eos_id)

    # Setting the Loss Error, the Optimizer and Gradient Clipping
    with tf.name_scope("optimization"):
        loss_error = seq2seq.sequence_loss(
            training_predictions, targets,
            tf.ones([input_shape[0], sequence_length]))

        # Optimizer
        _optimizer = tf.train.AdamOptimizer(param_learning_rate)

        # Gradient clipping
        gradients = _optimizer.compute_gradients(loss_error)
        clipped_gradients = [
            (tf.clip_by_value(gradient_tensor, -5., 5.), gradient_variable)
            for gradient_tensor, gradient_variable in gradients
            if gradient_tensor is not None
        ]

        optimizer = _optimizer.apply_gradients(clipped_gradients)

    logger.info('Tensorflow session is ready')
    return session, optimizer, loss_error, inputs, targets, lr, keep_prob, sequence_length, test_predictions
예제 #3
0
import json
import sys
import os

# In[ ]:

import re
from collections import Counter
from keras.utils import to_categorical
from keras.callbacks import Callback
from config import *
from seq2seq_model import seq2seq_model

# In[ ]:

model, encoder_model, decoder_model, inf_model = seq2seq_model()

train_id_list = open('hw2_1_data/training_data/id.txt').read().split()
train_data = {
    i: np.load('hw2_1_data/training_data/feat/' + i + '.npy')
    for i in train_id_list
}
train_label = json.loads(open('hw2_1_data/training_label.json', 'r').read())
test_id_list = open('hw2_1_data/testing_data/id.txt').read().split()
test_data = {
    i: np.load('hw2_1_data/testing_data/feat/' + i + '.npy')
    for i in test_id_list
}
test_label = json.loads(open('hw2_1_data/testing_label.json', 'r').read())

# In[ ]:
예제 #4
0
def main():
    # parameters
    feature_size = 512 * 7 * 7
    learning_rate = 0.0001
    num_epochs = 100
    batch_size = 6

    # create the save log file
    print("Create the directory")
    if not os.path.exists("./save"):
        os.makedirs("./save")
    if not os.path.exists("./logfile"):
        os.makedirs("./logfile")
    if not os.path.exists("./logfile/s2s"):
        os.makedirs("./logfile/s2s")

    # load my Dataset
    train_dataset = seq2seq_Dataset.seq2seq_Dataset(mode="train")
    test_dataset = seq2seq_Dataset.seq2seq_Dataset(mode="valid")

    print('the train_dataset has %d size.' % (len(train_dataset.data)))
    print('the valid_dataset has %d size.' % (len(test_dataset.data)))

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8,
                              collate_fn=my_collate)
    test_loader = DataLoader(test_dataset,
                             batch_size=1,
                             shuffle=True,
                             num_workers=8,
                             collate_fn=my_collate)

    print('the train_loader has %d size.' % (len(train_loader)))
    print('the test_loader has %d size.' % (len(test_loader)))

    model = seq2seq_model.seq2seq_model(feature_size)

    # GPU enable
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    print('Device used:', device)
    if torch.cuda.is_available():
        model = model.to(device)

    # setup optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           betas=(0.5, 0.999))
    load_checkpoint('./save/RNN-043.pth', model, optimizer)

    criterion = nn.CrossEntropyLoss()

    train_loss_list = []
    train_acc_list = []
    test_loss_list = []
    test_acc_list = []

    print("Starting training...")
    best_accuracy = -np.inf
    for epoch in range(num_epochs):
        model.train()
        print("Epoch:", epoch + 1)
        epoch_train_loss = 0.0
        train_acc = 0.0

        if (epoch + 1) == 50:
            optimizer.param_groups[0]['lr'] /= 2

        for i, (feature, label) in enumerate(train_loader):
            batch_size = len(train_loader)
            sampled_feature = torch.Tensor()
            sampled_label = torch.LongTensor()
            for i in range(batch_size):
                t_feature = feature[i]
                t_label = label[i]
                t_sampled_feature, t_sampled_label, lengths = random_sample(
                    t_feature, t_label)
                sampled_feature = torch.cat(
                    (sampled_feature, t_sampled_feature), 1)
                sampled_label = torch.cat((sampled_label, t_sampled_label), 1)

            optimizer.zero_grad()
            sampled_feature = Variable(sampled_feature).to(device)
            sampled_label = Variable(sampled_label).to(device)
            lengths = Variable(lengths).to(device)

            # print("feature :",sampled_feature.shape)
            output = model(sampled_feature)
            # print("feature :",sampled_label.shape)

            # print("output :",output.shape)
            train_loss = 0
            for i in range(batch_size):
                t_output = output[:, i, :]
                t_sampled_label = sampled_label[:, i]
                loss = criterion(t_output, t_sampled_label)
                train_loss += loss
            train_loss /= batch_size
            train_loss.backward()
            optimizer.step()

            epoch_train_loss += train_loss.item()

            # Accuracy
            acc = 0.
            for i in range(batch_size):
                t_output = output[:, i, :]
                t_sampled_label = sampled_label[:, i]
                t_output_label = torch.argmax(t_output, 1).cpu()
                t_acc = np.mean(
                    (t_output_label == t_sampled_label.cpu()).numpy())
                acc += t_acc
            acc /= batch_size
            train_acc += acc
            print('Epoch [%d/%d], Iter [%d/%d] loss %.4f,Acc %.4f, LR = %.6f' %
                  (epoch, num_epochs, i + 1, len(train_loader),
                   train_loss.item(), acc, optimizer.param_groups[0]['lr']))

        if (epoch) % 10 == 0:
            save_checkpoint('./save/s2s-%03i.pth' % (epoch), model, optimizer)

        # testing
        with torch.no_grad():

            model.eval()
            epoch_test_loss = 0.0
            test_acc = 0.
            for i, (feature, label) in enumerate(test_loader):
                sampled_feature, sampled_label, lengths = random_sample(
                    feature[0], label[0], valid=True)

                sampled_feature = Variable(sampled_feature).to(device)
                sampled_label = Variable(sampled_label).to(device)
                lengths = Variable(lengths).to(device)
                print("feature :", sampled_feature.shape)
                output = model(sampled_feature)
                sampled_label = sampled_label.view(-1)
                # print("output :",output.shape)
                # print("sampled_label :",sampled_label.shape)
                test_loss = criterion(output, sampled_label)
                epoch_test_loss += test_loss.item()
                # Accuracy
                output_label = torch.argmax(output, 1).cpu()
                #if i==0:
                #print(lengths)
                #print(output_label)
                acc = np.mean((output_label == sampled_label.cpu()).numpy())
                print("Acc for %d : %.4f" % (lengths, acc))
                test_acc += acc
                """
                sampled_feature,sampled_label,lengths = random_sample(feature,label,valid = True)
                optimizer.zero_grad()
                sampled_feature = Variable(sampled_feature).to(device)
                sampled_label = Variable(sampled_label).to(device)
                lengths = Variable(lengths).to(device)
                # print("feature :",sampled_feature.shape)
                output = model(sampled_feature)
                test_loss = criterion(output, sampled_label)
                epoch_test_loss += test_loss.item()
                # Accuracy
                output_label = torch.argmax(output,1).cpu()
                acc = np.mean((output_label == sampled_label.cpu()).numpy())
                test_acc += acc
                """
        print(
            '\n============\nEpoch [%d/%d] ,Train: Loss: %.4f | Acc: %.4f ,Validation: loss: %.4f | Acc: %.4f'
            % (epoch, num_epochs, epoch_train_loss / len(train_loader),
               train_acc / len(train_loader), epoch_test_loss /
               len(test_loader), test_acc / len(test_loader)))

        # save loss data
        train_loss_list.append(epoch_train_loss / len(train_loader))
        train_acc_list.append(train_acc / len(train_loader))
        test_loss_list.append(epoch_test_loss / len(test_loader))
        test_acc_list.append(test_acc / len(test_loader))

        if (test_acc / len(test_loader) > best_accuracy):
            best_accuracy = test_acc / len(test_loader)
            save_checkpoint(
                './save/s2s-RNN043-%03i-%.6f.pth' % (epoch, best_accuracy),
                model, optimizer)
            print('Save best model , test_acc = %.6f...' % (best_accuracy))

        print('-' * 88)

    with open('./logfile/s2s/train_loss.pkl', 'wb') as f:
        pickle.dump(train_loss_list, f)
    with open('./logfile/s2s/train_acc.pkl', 'wb') as f:
        pickle.dump(train_acc_list, f)
    with open('./logfile/s2s/test_loss.pkl', 'wb') as f:
        pickle.dump(test_loss_list, f)
    with open('./logfile/s2s/test_acc.pkl', 'wb') as f:
        pickle.dump(test_acc_list, f)
BATCH_SIZE = 32
TEST_EPOCHS = 1
SAVED_MODEL_DIR = 'saved_model_seq2seq'
# shuffling data
idxQ, idxA = helper.shuffle_data(idxQ, idxA)

# splitting data into train, test, validation
trainX, trainY, testX, testY, valX, valY = helper.split_data(
    idxQ, idxA, TRAIN_DATA_PERCENT, TEST_DATA_PERCENT, VAL_DATA_PERCENT)

# creating model class object
model = seq2seq_model(vocabSizeEncoder=VOCAB_SIZE_ENCODER,
                      vocabSizeDecoder=VOCAB_SIZE_DECODER,
                      maxLenX=MAX_LEN_X,
                      maxLenY=MAX_LEN_Y,
                      embedDims=EMBED_DIMS,
                      numLayers=NUMBER_OF_LAYERS,
                      hiddenUnits=HIDDEN_UNITS,
                      lr=LEARNING_RATE)

# re- building tensorflow graph

model.build_model_graph()

# creating saver object to restore
saver = tf.train.Saver()

# test batch generator object
batchGenTest = helper.get_next_batch(BATCH_SIZE, testX, testY)

# restoring
예제 #6
0
def run_training(model_file):
    tf.reset_default_graph()

    input_data, targets, lr, target_sequence_length, max_target_sequence_length, source_sequence_length = get_model_inputs(
        batch_size)
    end_points = seq2seq_model(input_data,
                               targets,
                               lr,
                               target_sequence_length,
                               max_target_sequence_length,
                               source_sequence_length,
                               question_word_to_int,
                               answer_word_to_int,
                               encoding_embedding_size,
                               decoding_embedding_size,
                               rnn_size,
                               num_layers,
                               batch_size,
                               train=True)

    start_epoch = 0
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, model_file)
        try:
            for epoch_i in range(start_epoch + 1, epochs + 1):
                for batch_i, (targets_batch, sources_batch, targets_lengths,
                              sources_lengths) in enumerate(
                                  get_batches(train_target, train_source,
                                              batch_size,
                                              question_word_to_int['<PAD>'],
                                              answer_word_to_int['<PAD>'])):

                    # Training step
                    _, loss = sess.run(
                        [end_points['train_op'], end_points['loss']],
                        feed_dict={
                            input_data: sources_batch,
                            targets: targets_batch,
                            lr: learning_rate,
                            target_sequence_length: targets_lengths,
                            source_sequence_length: sources_lengths
                        })

                    # Debug message updating us on the status of the training
                    if batch_i % display_step == 0:
                        # Calculate validation cost
                        validation_loss = sess.run(
                            end_points['loss'], {
                                input_data: valid_sources_batch,
                                targets: valid_targets_batch,
                                lr: learning_rate,
                                target_sequence_length: valid_targets_lengths,
                                source_sequence_length: valid_sources_lengths
                            })

                        print(
                            'Epoch {:>3}/{} Batch {:>4}/{} - Loss: {:>6.3f}  - Validation loss: {:>6.3f}'
                            .format(epoch_i, epochs, batch_i,
                                    len(train_source) // batch_size, loss,
                                    validation_loss))
                        # Save Model
                saver.save(sess, model_file)
                start_epoch += 1
        except KeyboardInterrupt:
            print(
                '[INFO] Interrupt manually, try saving checkpoint for now...')
            saver.save(sess, model_file)
            print(
                '[INFO] Last epoch were saved, next time will start from epoch {}.'
                .format(epoch_i + 1))
예제 #7
0
# 构造graph
train_graph = tf.Graph()

with train_graph.as_default():
	
	# 获得模型输入	
	input_data, targets, lr, target_sequence_length, max_target_sequence_length, source_sequence_length = get_inputs()
	
	training_decoder_output, predicting_decoder_output = sq.seq2seq_model(input_data, 
																	  targets, 
																	  lr, 
																	  target_sequence_length, 
																	  max_target_sequence_length, 
																	  source_sequence_length,
																	  len(da.source_letter_to_int),
																	  len(da.target_letter_to_int),
																	  encoding_embedding_size, 
																	  decoding_embedding_size, 
																	  rnn_size, 
																	  num_layers,
																	  batch_size)	
	
	training_logits = tf.identity(training_decoder_output.rnn_output, 'logits')
	predicting_logits = tf.identity(predicting_decoder_output.sample_id, name='predictions')
	
	masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks')

	with tf.name_scope("optimization"):
		
		# Loss function