Example #1
0
def trainIters(model_name, inputVoc, outputVoc, pairs, dev_pairs,
               encoder, decoder, encoder_optimizer, decoder_optimizer,
               input_embedding, output_embedding, encoder_n_layers, decoder_n_layers,
               save_dir, n_iteration, batch_size, print_every, check_every, clip, corpus_name, loadFilename):

    weight = torch.ones(outputVoc.num_words)
    weight[Params.PAD_token] = 0

    criterion = nn.NLLLoss(ignore_index=Params.PAD_token)
    # criterion = nn.NLLLoss(weight=weight, ignore_index=Params.PAD_token)

    training_batches = []
    batch_num = int(math.ceil(len(pairs) / batch_size))
    print("Batch Number (Train):", batch_num)
    for i in range(batch_num):
        batch_data = DataUtil.batch2TrainData(inputVoc, outputVoc, pairs[i * batch_size: (i + 1) * batch_size])
        training_batches.append(batch_data)

    dev_batches = []
    dev_batch_num = int(math.ceil(len(dev_pairs) / batch_size))

    for i in range(dev_batch_num):
        dev_batch_data = DataUtil.batch2TrainData(inputVoc, outputVoc, dev_pairs[i * batch_size: (i + 1) * batch_size])
        dev_batches.append(dev_batch_data)


    # Initializations
    print('Initializing ...')
    start_iteration = 1
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    print_loss = 0
    larger_count = 0
    best_dev_ppl = sys.maxsize

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[(iteration - 1) % batch_num]
        # Extract fields from batch
        input_variable, lengths, target_variable, max_target_len = training_batch

        # Run a training iteration with batch
        loss = train(input_variable, lengths, target_variable, max_target_len, criterion,
                     encoder, decoder, encoder_optimizer, decoder_optimizer, clip)

        print_loss += loss

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(iteration, iteration / n_iteration * 100, print_loss_avg))
            print_loss = 0

        # Save checkpoint
        if (iteration % check_every == 0):


            directory = os.path.join(save_dir, model_name, corpus_name,
                                     '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, Params.hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)

            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'input_voc_dict': inputVoc.__dict__,
                'output_voc_dict': outputVoc.__dict__,
                'input_embedding': input_embedding.state_dict(),
                'output_embedding': output_embedding.state_dict()
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))

            encoder.eval()
            decoder.eval()

            dev_ppl = EvaluateUtil.calc_ppl(encoder, decoder, outputVoc.num_words, dev_batches, Params.PAD_token)

            if (dev_ppl < best_dev_ppl):
                best_dev_ppl = dev_ppl

                torch.save({
                    'iteration': iteration,
                    'en': encoder.state_dict(),
                    'de': decoder.state_dict(),
                    'en_opt': encoder_optimizer.state_dict(),
                    'de_opt': decoder_optimizer.state_dict(),
                    'loss': loss,
                    'input_voc_dict': inputVoc.__dict__,
                    'output_voc_dict': outputVoc.__dict__,
                    'input_embedding': input_embedding.state_dict(),
                    'output_embedding': output_embedding.state_dict()
                }, os.path.join(directory, '{}.tar'.format('best_ppl')))

                larger_count = 0

            else:
                larger_count += 1

            print("#CHECK POINT# Iteration: {}; Best PPL: {:.4f}; Current PPL: {:.4f}; Larger count: {}".format(iteration, best_dev_ppl, dev_ppl, larger_count))

            encoder.train()
            decoder.train()

        if(larger_count > Params.break_count):
            print("BREAK: Meet Break Count")
            break