Example #1
0
def runTest(n_layers, hidden_size, reverse, modelFile, beam_size, inp, corpus):
    torch.set_grad_enabled(False)

    voc, pairs = loadPrepareData(corpus)
    embedding = nn.Embedding(voc.num_words, hidden_size)
    encoder = EncoderRNN(hidden_size, embedding, n_layers)
    attn_model = 'dot'
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, n_layers)

    checkpoint = torch.load(modelFile,
                            map_location=lambda storage, loc: storage)
    encoder.load_state_dict(checkpoint['en'])
    decoder.load_state_dict(checkpoint['de'])

    # train mode set to false, effect only on dropout, batchNorm
    encoder.train(False)
    decoder.train(False)

    encoder = encoder.to(device)
    decoder = decoder.to(device)

    if inp:
        evaluateInput(encoder, decoder, voc, beam_size)
    else:
        evaluateRandomly(encoder, decoder, voc, pairs, reverse, beam_size, 20)
Example #2
0
def runTest(args, n_layers, hidden_size, reverse, modelFile, beam_size,
            batch_size, input, corpus):

    data, length = loadPrepareData(args)
    voc = data.voc
    print('load data...')
    user_length, item_length = length  #, user_length2, item_length2 = length
    # train_batches = batchify(data.train, data.user_text, user_length, data.item_text, item_length, batch_size)
    # val_batches = batchify(data.dev, data.user_text, user_length, data.item_text, item_length, batch_size)
    test_batches = batchify(data.test, data.user_text, user_length,
                            data.item_text, item_length, batch_size)

    print('Building encoder and decoder ...')

    embedding = nn.Embedding(data.voc.n_words, hidden_size)
    encoderU = EncoderRNNlinear(data.voc.n_words, hidden_size, embedding,
                                data.dmax, n_layers)
    encoderB = EncoderRNNlinear(data.voc.n_words, hidden_size, embedding,
                                data.dmax, n_layers)

    attn_model = 'dot'
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  data.voc.n_words, n_layers)

    # load model
    checkpoint = torch.load(modelFile)
    encoderU.load_state_dict(checkpoint['enU'])
    encoderB.load_state_dict(checkpoint['enB'])
    decoder.load_state_dict(checkpoint['de'])

    # train mode set to false, effect only on dropout, batchNorm
    encoderU.train(False)
    encoderB.train(False)
    decoder.train(False)

    if USE_CUDA:
        encoderU = encoderU.cuda()
        encoderB = encoderB.cuda()
        decoder = decoder.cuda()

    if not args.sample:
        # evaluate on test
        # for test_batch in tqdm(test_batches):
        for test_i, test_batch in enumerate(test_batches):
            if test_i > 1:
                break
            input_index, input_variable, lengths, target_variable, mask, max_target_len = test_batch
            user_input_variable, business_input_variable = input_variable
            user_lengths, business_lengths = lengths

            # evaluate on train
            evaluateRandomly(encoderU, encoderB, decoder, voc, \
                             input_index, user_input_variable, business_input_variable, \
                             user_lengths, business_lengths, \
                             target_variable, mask, max_target_len, reverse, beam_size)
    else:
        # evaluate using sample
        sample(encoderU, encoderB, decoder, voc, test_batches, reverse)
Example #3
0
def trainIters(attn_model, hidden_size,encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, \
               learning_rate, decoder_learning_ratio, print_every, save_every, clip, dropout, \
               corpus_name, datafile, modelFile=None, need_trim=True):
    # load train data
    voc, pairs = loadPrepareData(datafile)
    if need_trim:
        # Trim voc and pairs
        pairs = trimRareWords(voc, pairs, MIN_COUNT)
    # Load batches for each iteration
    training_batches = [
        batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
        for _ in range(n_iteration)
    ]

    if modelFile:
        # If loading on same machine the model was trained on
        checkpoint = torch.load(modelFile)
        # If loading a model trained on GPU to CPU
        # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
        encoder_sd = checkpoint['en']
        decoder_sd = checkpoint['de']
        encoder_optimizer_sd = checkpoint['en_opt']
        decoder_optimizer_sd = checkpoint['de_opt']
        embedding_sd = checkpoint['embedding']

    embedding = nn.Embedding(voc.num_words, hidden_size)
    if modelFile:
        embedding.load_state_dict(embedding_sd)

    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)
    # get model params
    if modelFile:
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)

    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=learning_rate * decoder_learning_ratio)
    if modelFile:
        encoder_optimizer.load_state_dict(encoder_optimizer_sd)
        decoder_optimizer.load_state_dict(decoder_optimizer_sd)
    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    if modelFile:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop
    print("Training...")
    encoder.train()
    decoder.train()

    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len = training_batch

        # Run a training iteration with batch
        loss = train(input_variable, lengths, target_variable, mask,
                     max_target_len, encoder, decoder, encoder_optimizer,
                     decoder_optimizer, batch_size, clip)
        print_loss += loss

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(iteration, \
                                                            iteration / n_iteration * 100, print_loss_avg))
            print_loss = 0

        # Save checkpoint
        if (iteration % save_every == 0):
            directory = os.path.join(save_dir, "model", '{}-{}_{}'.format(encoder_n_layers, \
                                                                                          decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save(
                {
                    'iteration': iteration,
                    'en': encoder.state_dict(),
                    'de': decoder.state_dict(),
                    'en_opt': encoder_optimizer.state_dict(),
                    'de_opt': decoder_optimizer.state_dict(),
                    'loss': loss,
                    'embedding': embedding.state_dict()
                },
                os.path.join(directory,
                             '{}_{}.tar'.format(iteration, 'checkpoint')))
def train(**kwargs):

    opt = Config()
    for k, v in kwargs.items():  #设置参数
        setattr(opt, k, v)

    # 数据
    dataloader = get_dataloader(opt)
    _data = dataloader.dataset._data
    word2ix = _data['word2ix']
    sos = word2ix.get(_data.get('sos'))
    voc_length = len(word2ix)

    #定义模型
    encoder = EncoderRNN(opt, voc_length)
    decoder = LuongAttnDecoderRNN(opt, voc_length)

    #加载断点,从上次结束地方开始
    if opt.model_ckpt:
        checkpoint = torch.load(opt.model_ckpt)
        encoder.load_state_dict(checkpoint['en'])
        decoder.load_state_dict(checkpoint['de'])

    #切换模式
    encoder = encoder.to(opt.device)
    decoder = decoder.to(opt.device)
    encoder.train()
    decoder.train()

    #定义优化器(注意与encoder.to(device)前后不要反)
    encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                         lr=opt.learning_rate)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=opt.learning_rate *
                                         opt.decoder_learning_ratio)
    if opt.model_ckpt:
        encoder_optimizer.load_state_dict(checkpoint['en_opt'])
        decoder_optimizer.load_state_dict(checkpoint['de_opt'])

    #定义打印loss的变量
    print_loss = 0

    for epoch in range(opt.epoch):
        for ii, data in enumerate(dataloader):
            #取一个batch训练
            loss = train_by_batch(sos, opt, data, encoder_optimizer,
                                  decoder_optimizer, encoder, decoder)
            print_loss += loss
            #打印损失
            if ii % opt.print_every == 0:
                print_loss_avg = print_loss / opt.print_every
                print(
                    "Epoch: {}; Epoch Percent complete: {:.1f}%; Average loss: {:.4f}"
                    .format(epoch, epoch / opt.epoch * 100, print_loss_avg))
                print_loss = 0

        # 保存checkpoint
        if epoch % opt.save_every == 0:
            checkpoint_path = '{prefix}_{time}'.format(
                prefix=opt.prefix, time=time.strftime('%m%d_%H%M'))
            torch.save(
                {
                    'en': encoder.state_dict(),
                    'de': decoder.state_dict(),
                    'en_opt': encoder_optimizer.state_dict(),
                    'de_opt': decoder_optimizer.state_dict(),
                }, checkpoint_path)
Example #5
0
decoder = decoder.to(device)
print('Models built and ready to go!')

# step8: do train
# 配置训练/优化
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 4000
print_every = 1
save_every = 500

# 确保dropout layers在训练模型中
encoder.train()
decoder.train()

# 初始化优化器
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(),
                               lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# 运行训练迭代
print("Starting Training!")
trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer,
           decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers,
           save_dir, n_iteration, batch_size, print_every, save_every, clip,
Example #6
0
def runTest(n_layers, hidden_size, reverse, modelFile, beam_size, input,
            corpus):

    voc, pairs, valid_pairs, test_pairs = loadPrepareData(corpus)

    print('Building encoder and decoder ...')
    # aspect
    with open(os.path.join(save_dir, '15_aspect.pkl'), 'rb') as fp:
        aspect_ids = pickle.load(fp)
    aspect_num = 15  # 15 | 20 main aspects and each of them has 100 words
    aspect_ids = Variable(
        torch.LongTensor(aspect_ids), requires_grad=False
    )  # convert list into torch Variable, used to index word embedding
    # attribute embeddings
    attr_size = 64  #
    attr_num = 2

    with open(os.path.join(save_dir, 'user_item.pkl'), 'rb') as fp:
        user_dict, item_dict = pickle.load(fp)
    num_user = len(user_dict)
    num_item = len(item_dict)
    attr_embeddings = []
    attr_embeddings.append(nn.Embedding(num_user, attr_size))
    attr_embeddings.append(nn.Embedding(num_item, attr_size))
    aspect_embeddings = []
    aspect_embeddings.append(nn.Embedding(num_user, aspect_num))
    aspect_embeddings.append(nn.Embedding(num_item, aspect_num))
    if USE_CUDA:
        for attr_embedding in attr_embeddings:
            attr_embedding = attr_embedding.cuda()
        for aspect_embedding in aspect_embeddings:
            aspect_embedding = aspect_embedding.cuda()
        aspect_ids = aspect_ids.cuda()

    encoder1 = AttributeEncoder(attr_size, attr_num, hidden_size,
                                attr_embeddings, n_layers)
    encoder2 = AttributeEncoder(aspect_num, attr_num, hidden_size,
                                aspect_embeddings, n_layers)
    embedding = nn.Embedding(voc.n_words, hidden_size)
    encoder3 = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers)

    attn_model = 'dot'
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  attr_size, voc.n_words, aspect_ids, n_layers)
    checkpoint = torch.load(modelFile)
    encoder1.load_state_dict(checkpoint['en1'])
    encoder2.load_state_dict(checkpoint['en2'])
    encoder3.load_state_dict(checkpoint['en3'])
    decoder.load_state_dict(checkpoint['de'])

    # use cuda
    if USE_CUDA:
        encoder1 = encoder1.cuda()
        encoder2 = encoder2.cuda()
        encoder3 = encoder3.cuda()
        decoder = decoder.cuda()

    # train mode set to false, effect only on dropout, batchNorm
    encoder1.train(False)
    encoder2.train(False)
    encoder3.train(False)
    decoder.train(False)

    #evaluateRandomly(encoder1, encoder2, encoder3, decoder, voc, pairs, reverse, beam_size, 100)
    evaluateRandomly(encoder1, encoder2, encoder3, decoder, voc, test_pairs,
                     reverse, beam_size, len(test_pairs))
Example #7
0
def trainIters(corpus,
               reverse,
               n_epoch,
               learning_rate,
               batch_size,
               n_layers,
               hidden_size,
               print_every,
               loadFilename=None,
               attn_model='dot',
               decoder_learning_ratio=1.0):
    print(
        "corpus: {}, reverse={}, n_epoch={}, learning_rate={}, batch_size={}, n_layers={}, hidden_size={}, decoder_learning_ratio={}"
        .format(corpus, reverse, n_epoch, learning_rate, batch_size, n_layers,
                hidden_size, decoder_learning_ratio))

    voc, pairs, valid_pairs, test_pairs = loadPrepareData(corpus)
    print('load data...')

    path = "data/expansion"
    # training data
    corpus_name = corpus
    training_batches = None
    try:
        training_batches = torch.load(
            os.path.join(
                save_dir, path,
                '{}_{}.tar'.format(filename(reverse, 'training_batches'),
                                   batch_size)))
    except FileNotFoundError:
        print('Training pairs not found, generating ...')
        training_batches = batchify(pairs, batch_size, voc, reverse)
        print('Complete building training pairs ...')
        torch.save(
            training_batches,
            os.path.join(
                save_dir, path,
                '{}_{}.tar'.format(filename(reverse, 'training_batches'),
                                   batch_size)))

    # validation/test data
    eval_batch_size = 10
    try:
        val_batches = torch.load(
            os.path.join(
                save_dir, path,
                '{}_{}.tar'.format(filename(reverse, 'val_batches'),
                                   eval_batch_size)))
    except FileNotFoundError:
        print('Validation pairs not found, generating ...')
        val_batches = batchify(valid_pairs,
                               eval_batch_size,
                               voc,
                               reverse,
                               evaluation=True)
        print('Complete building validation pairs ...')
        torch.save(
            val_batches,
            os.path.join(
                save_dir, path,
                '{}_{}.tar'.format(filename(reverse, 'val_batches'),
                                   eval_batch_size)))

    try:
        test_batches = torch.load(
            os.path.join(
                save_dir, path,
                '{}_{}.tar'.format(filename(reverse, 'test_batches'),
                                   eval_batch_size)))
    except FileNotFoundError:
        print('Test pairs not found, generating ...')
        test_batches = batchify(test_pairs,
                                eval_batch_size,
                                voc,
                                reverse,
                                evaluation=True)
        print('Complete building test pairs ...')
        torch.save(
            test_batches,
            os.path.join(
                save_dir, path,
                '{}_{}.tar'.format(filename(reverse, 'test_batches'),
                                   eval_batch_size)))

    # model
    checkpoint = None
    print('Building encoder and decoder ...')
    # aspect
    with open(os.path.join(save_dir, '15_aspect.pkl'), 'rb') as fp:
        aspect_ids = pickle.load(fp)
    aspect_num = 15  # 15 | 20 main aspects and each of them has 100 words
    aspect_ids = Variable(
        torch.LongTensor(aspect_ids), requires_grad=False
    )  # convert list into torch Variable, used to index word embedding
    # attribute embeddings
    attr_size = 64  #
    attr_num = 2

    print(
        "corpus: {}, reverse={}, n_words={}, n_epoch={}, learning_rate={}, batch_size={}, n_layers={}, hidden_size={}, decoder_learning_ratio={}, attr_size={}, aspect_num={}"
        .format(corpus, reverse, voc.n_words, n_epoch, learning_rate,
                batch_size, n_layers, hidden_size, decoder_learning_ratio,
                attr_size, aspect_num))
    with open(os.path.join(save_dir, 'user_item.pkl'), 'rb') as fp:
        user_dict, item_dict = pickle.load(fp)
    num_user = len(user_dict)
    num_item = len(item_dict)
    attr_embeddings = []
    attr_embeddings.append(nn.Embedding(num_user, attr_size))
    attr_embeddings.append(nn.Embedding(num_item, attr_size))
    aspect_embeddings = []
    aspect_embeddings.append(nn.Embedding(num_user, aspect_num))
    aspect_embeddings.append(nn.Embedding(num_item, aspect_num))
    if USE_CUDA:
        for attr_embedding in attr_embeddings:
            attr_embedding = attr_embedding.cuda()
        for aspect_embedding in aspect_embeddings:
            aspect_embedding = aspect_embedding.cuda()
        aspect_ids = aspect_ids.cuda()

    encoder1 = AttributeEncoder(attr_size, attr_num, hidden_size,
                                attr_embeddings, n_layers)
    encoder2 = AttributeEncoder(aspect_num, attr_num, hidden_size,
                                aspect_embeddings, n_layers)
    embedding = nn.Embedding(voc.n_words, hidden_size)
    encoder3 = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers)
    attn_model = 'dot'
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  attr_size, voc.n_words, aspect_ids, n_layers)
    if loadFilename:
        checkpoint = torch.load(loadFilename)
        encoder1.load_state_dict(checkpoint['en1'])
        encoder2.load_state_dict(checkpoint['en2'])
        encoder3.load_state_dict(checkpoint['en3'])
        decoder.load_state_dict(checkpoint['de'])
    # use cuda
    if USE_CUDA:
        encoder1 = encoder1.cuda()
        encoder2 = encoder2.cuda()
        encoder3 = encoder3.cuda()
        decoder = decoder.cuda()

    # optimizer
    print('Building optimizers ...')
    encoder1_optimizer = optim.Adam(encoder1.parameters(), lr=learning_rate)
    encoder2_optimizer = optim.Adam(encoder2.parameters(), lr=learning_rate)
    encoder3_optimizer = optim.Adam(encoder3.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=learning_rate * decoder_learning_ratio)
    if loadFilename:
        encoder1_optimizer.load_state_dict(checkpoint['en1_opt'])
        encoder2_optimizer.load_state_dict(checkpoint['en2_opt'])
        encoder3_optimizer.load_state_dict(checkpoint['en3_opt'])
        decoder_optimizer.load_state_dict(checkpoint['de_opt'])

    # initialize
    print('Initializing ...')
    start_epoch = 0
    perplexity = []
    best_val_loss = None
    print_loss = 0
    if loadFilename:
        start_epoch = checkpoint['epoch'] + 1
        perplexity = checkpoint['plt']

    for epoch in range(start_epoch, n_epoch):
        epoch_start_time = time.time()
        # train epoch
        encoder1.train()
        encoder2.train()
        encoder3.train()
        decoder.train()
        print_loss = 0
        start_time = time.time()
        for batch, training_batch in enumerate(training_batches):
            attr_input, summary_input, summary_input_lengths, title_input, title_input_lengths, target_variable, mask, max_target_len = training_batch

            loss = train(attr_input, summary_input, summary_input_lengths,
                         title_input, title_input_lengths, target_variable,
                         mask, max_target_len, encoder1, encoder2, encoder3,
                         decoder, embedding, encoder1_optimizer,
                         encoder2_optimizer, encoder3_optimizer,
                         decoder_optimizer, batch_size)
            print_loss += loss
            perplexity.append(loss)
            #print("batch {} loss={}".format(batch, loss))
            if batch % print_every == 0 and batch > 0:
                cur_loss = print_loss / print_every
                elapsed = time.time() - start_time

                print(
                    '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                        epoch, batch, len(training_batches), learning_rate,
                        elapsed * 1000 / print_every, cur_loss,
                        math.exp(cur_loss)))

                print_loss = 0
                start_time = time.time()
        # evaluate
        val_loss = 0
        for val_batch in val_batches:
            attr_input, summary_input, summary_input_lengths, title_input, title_input_lengths, target_variable, mask, max_target_len = val_batch
            loss = evaluate(attr_input, summary_input, summary_input_lengths,
                            title_input, title_input_lengths, target_variable,
                            mask, max_target_len, encoder1, encoder2, encoder3,
                            decoder, embedding, encoder1_optimizer,
                            encoder2_optimizer, encoder3_optimizer,
                            decoder_optimizer, batch_size)
            val_loss += loss
        val_loss /= len(val_batches)

        print('-' * 89)
        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
              'valid ppl {:8.2f}'.format(epoch,
                                         (time.time() - epoch_start_time),
                                         val_loss, math.exp(val_loss)))
        print('-' * 89)
        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            directory = os.path.join(save_dir, 'model',
                                     '{}_{}'.format(n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save(
                {
                    'epoch': epoch,
                    'en1': encoder1.state_dict(),
                    'en2': encoder2.state_dict(),
                    'en3': encoder3.state_dict(),
                    'de': decoder.state_dict(),
                    'en1_opt': encoder1_optimizer.state_dict(),
                    'en2_opt': encoder2_optimizer.state_dict(),
                    'en3_opt': encoder3_optimizer.state_dict(),
                    'de_opt': decoder_optimizer.state_dict(),
                    'loss': loss,
                    'plt': perplexity
                },
                os.path.join(
                    directory, '{}_{}.tar'.format(
                        epoch,
                        filename(reverse, 'lexicon_title_expansion_model'))))
            best_val_loss = val_loss

            # Run on test data.
            test_loss = 0
            for test_batch in test_batches:
                attr_input, summary_input, summary_input_lengths, title_input, title_input_lengths, target_variable, mask, max_target_len = test_batch
                loss = evaluate(attr_input, summary_input,
                                summary_input_lengths, title_input,
                                title_input_lengths, target_variable, mask,
                                max_target_len, encoder1, encoder2, encoder3,
                                decoder, embedding, encoder1_optimizer,
                                encoder2_optimizer, encoder3_optimizer,
                                decoder_optimizer, batch_size)
                test_loss += loss
            test_loss /= len(test_batches)
            print('-' * 89)
            print('| test loss {:5.2f} | test ppl {:8.2f}'.format(
                test_loss, math.exp(test_loss)))
            print('-' * 89)

        if val_loss > best_val_loss:
            break
Example #8
0
def main():
    USE_CUDA = torch.cuda.is_available()
    device = torch.device("cuda" if USE_CUDA else "cpu")

    # load data
    corpus_name = "cornell movie-dialogs corpus"
    corpus = os.path.join("data", corpus_name)
    datafile = os.path.join(corpus, "formatted_movie_lines.txt")
    voc, pairs = loadPrepareData(corpus_name, datafile)
    # Trim voc and pairs
    pairs = trimRareWords(voc, pairs, MIN_COUNT)

    # Configure models
    model_name = 'cb_model'
    attn_model = 'dot'
    # attn_model = 'general'
    # attn_model = 'concat'
    hidden_size = 500
    encoder_n_layers = 2
    decoder_n_layers = 2
    dropout = 0.1
    batch_size = 64

    # Set checkpoint to load from; set to None if starting from scratch
    loadFilename = None
    # checkpoint_iter = 4000
    # loadFilename = os.path.join(save_dir, model_name, corpus_name,
    #                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
    #                            '{}_checkpoint.tar'.format(checkpoint_iter))

    # Load model if a loadFilename is provided
    checkpoint = None
    if loadFilename:
        # If loading on same machine the model was trained on
        checkpoint = torch.load(loadFilename)
        # If loading a model trained on GPU to CPU
        # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
        encoder_sd = checkpoint['en']
        decoder_sd = checkpoint['de']
        encoder_optimizer_sd = checkpoint['en_opt']
        decoder_optimizer_sd = checkpoint['de_opt']
        embedding_sd = checkpoint['embedding']
        voc.__dict__ = checkpoint['voc_dict']

    print('Building encoder and decoder ...')
    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, hidden_size)
    if loadFilename:
        embedding.load_state_dict(embedding_sd)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)
    if loadFilename:
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)
    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    print('Models built and ready to go!')

    # Configure training/optimization
    clip = 50.0
    teacher_forcing_ratio = 1.0
    learning_rate = 0.0001
    decoder_learning_ratio = 5.0
    n_iteration = 4000
    print_every = 1
    save_every = 500

    # Ensure dropout layers are in train mode
    encoder.train()
    decoder.train()

    # Initialize optimizers
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=learning_rate * decoder_learning_ratio)
    if loadFilename:
        encoder_optimizer.load_state_dict(encoder_optimizer_sd)
        decoder_optimizer.load_state_dict(decoder_optimizer_sd)

    # Run training iterations
    print("Starting Training!")
    save_dir = os.path.join("data", "save")
    trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer,
               decoder_optimizer, embedding, encoder_n_layers,
               decoder_n_layers, save_dir, n_iteration, batch_size,
               print_every, save_every, clip, corpus_name, checkpoint,
               hidden_size, teacher_forcing_ratio, device)
Example #9
0
def main():

    phase = {"train": {"pairs": []}, "test": {"pairs": []}}

    if run_mode == 'train':
        with open(datafiles["qr_train"], "r") as file_obj:
            for line in file_obj:
                phase["train"]["pairs"].append(line.split("\n")[0].split("\t"))
        with open(f"{os.path.join(split_path, 'voc.pickle')}", "rb") as f:
            phase["train"]["voc"] = pickle.load(f)

        # Shuffle both sets ONCE before the entire training
        random.seed(1)  # seed can be any number
        random.shuffle(phase["train"]["pairs"])

        print('Building training set encoder and decoder ...')
        # Initialize word embeddings for both encoder and decoder
        embedding = nn.Embedding(phase["train"]["voc"].num_words,
                                 HIDDEN_SIZE).to(device)

        # Initialize encoder & decoder models
        encoder = EncoderRNN(HIDDEN_SIZE,
                             embedding,
                             ENCODER_N_LAYERS,
                             DROPOUT,
                             gate=encoder_name,
                             bidirectional=BIDIRECTION)
        decoder = LuongAttnDecoderRNN(attn_model,
                                      embedding,
                                      HIDDEN_SIZE,
                                      phase["train"]["voc"].num_words,
                                      DECODER_N_LAYERS,
                                      DROPOUT,
                                      gate=decoder_name)

        # Use appropriate device
        encoder = encoder.to(device)
        decoder = decoder.to(device)
        encoder.train()
        decoder.train()
        print('Models built and ready to go!')

        # Initialize optimizers
        print('Building optimizers ...')
        if args.get('optimizer') == "ADAM":
            encoder_optimizer = optim.Adam(encoder.parameters(),
                                           lr=LR,
                                           weight_decay=WD)
            decoder_optimizer = optim.Adam(decoder.parameters(),
                                           lr=LR,
                                           weight_decay=WD)
        elif args.get('optimizer') == "SGD":
            encoder_optimizer = optim.SGD(encoder.parameters(), lr=LR)
            decoder_optimizer = optim.SGD(decoder.parameters(), lr=LR)
        else:
            raise ValueError(
                "Wrong optimizer type has been given as an argument.")

        # If you have cuda, configure cuda to call
        for optimizer in [encoder_optimizer, decoder_optimizer]:
            for state in optimizer.state.values():
                for k, v in state.items():
                    if isinstance(v, torch.Tensor):
                        state[k] = v.cuda()

        print("Starting Training!")
        save_model = run(encoder,
                         decoder,
                         encoder_optimizer,
                         decoder_optimizer,
                         EPOCH_NUM,
                         BATCH_SIZE,
                         CLIP,
                         phase,
                         evaluation=True)
        if save_model:
            try:
                save_seq2seq(encoder, decoder, encoder_name, decoder_name,
                             encoder_optimizer, decoder_optimizer,
                             phase["train"]["losses"], phase["train"]["bleu"],
                             phase["train"]["voc"], embedding, DROPOUT, CLIP,
                             WD)
                print("Model has been saved successfully.")
            except Exception as error:
                print("Saving the model has caused an exception:", error)

        write_results("loss", "train", encoder, encoder_name, decoder_name,
                      DROPOUT, CLIP, WD, phase["train"]["losses"])
        write_results("bleu", "train", encoder, encoder_name, decoder_name,
                      DROPOUT, CLIP, WD, phase["train"]["bleu"])

    else:
        # Loading basic objects needed for all 3 of validation, testing and chatting
        checkpoint = torch.load(args.get('model_path'))
        embedding = load_embedding(checkpoint, HIDDEN_SIZE)
        encoder = load_encoder(checkpoint, EncoderRNN, HIDDEN_SIZE, embedding,
                               ENCODER_N_LAYERS, DROPOUT, encoder_name,
                               BIDIRECTION)
        voc = load_voc(checkpoint)
        decoder = load_decoder(checkpoint, LuongAttnDecoderRNN, attn_model,
                               embedding, HIDDEN_SIZE, voc.num_words,
                               DECODER_N_LAYERS, DROPOUT, decoder_name)
        encoder = encoder.to(device)
        decoder = decoder.to(device)

        if run_mode == "test":
            with open(datafiles["qr_train"], "r") as file_obj:
                for line in file_obj:
                    phase["train"]["pairs"].append(
                        line.split("\n")[0].split("\t"))
            with open(datafiles["qr_test"], "r") as file_obj:
                for line in file_obj:
                    phase["test"]["pairs"].append(
                        line.split("\n")[0].split("\t"))
            with open(f"{os.path.join(split_path, 'voc.pickle')}", "rb") as f:
                phase["train"]["voc"] = pickle.load(f)
            _ = run(encoder,
                    decoder,
                    None,
                    None,
                    EPOCH_NUM,
                    BATCH_SIZE,
                    CLIP,
                    phase,
                    evaluation=True)
            write_results("loss", "train", encoder, encoder_name, decoder_name,
                          DROPOUT, CLIP, WD, phase["train"]["losses"])
            write_results("bleu", "train", encoder, encoder_name, decoder_name,
                          DROPOUT, CLIP, WD, phase["train"]["bleu"])

            write_results("loss", "test", encoder, encoder_name, decoder_name,
                          DROPOUT, CLIP, WD, phase["test"]["losses"])
            write_results("bleu", "test", encoder, encoder_name, decoder_name,
                          DROPOUT, CLIP, WD, phase["test"]["bleu"])

        elif run_mode == "chat":
            # Initialize search module
            searcher = GreedySearchDecoder(encoder, decoder)
            chat(searcher, voc)

        else:
            raise ValueError(
                "Wrong run_mode has been given, options: ['train', 'test', 'chat']"
            )