예제 #1
0
class KeplerModel(pl.LightningModule):
    def __init__(self):
        super(KeplerModel, self).__init__()

        #Initialize Model Parameters Using Config Properties
        self.model = Encoder(config['seq_length'], config['hidden_size'],
                             config['output_dim'], config['n_layers'])

        #Initialize a Cross Entropy Loss Criterion for Training
        self.criterion = torch.nn.CrossEntropyLoss()

    #Define a Forward Pass of the Model
    def forward(self, x, h):
        return self.model.forward(x, h)

    def training_step(self, batch, batch_idx):

        #Set Model to Training Mode
        self.model.train()

        #Unpack Data and Labels from Batch
        x, y = batch

        #Reshape Data into Shape (batch_size, 1, seq_length)
        x = x.view(x.size(0), -1, x.size(1))

        #Initalize the hidden state for forward pass
        h = self.model.init_hidden(x.size(0))

        #Zero out the model gradients to avoid accumulation
        self.model.zero_grad()

        #Forward Pass Through Model
        out, h = self.forward(x, h)

        #Calculate Cross Entropy Loss
        loss = self.criterion(out, y.long().squeeze())

        #Obtain Class Labels
        y_hat = torch.max(out, 1)[1]

        #Compute the balanced accuracy (weights based on number of ex. in each class)
        accuracy = balanced_accuracy_score(y, y_hat)

        #Compute weighted f1 score to account for class imbalance
        f1 = f1_score(y, y_hat, average='weighted')

        #Create metric object for tensorboard logging
        tensorboard_logs = {
            'train_loss': loss.item(),
            'accuracy': accuracy,
            'f1': f1
        }

        return {'loss': loss, 'log': tensorboard_logs}

    def validation_step(self, batch, batch_idx):

        #Set Model to Eval Mode
        self.model.eval()

        #Unpack data and labels from batch
        x, y = batch

        #Initialize Hidden State
        h = self.model.init_hidden(x.size(0))

        #Reshape Data into Shape (batch_size, 1, seq_length)
        x = x.view(x.size(0), -1, x.size(1))

        #Calculate Forward Pass of The Model
        out, h = self.forward(x, h)

        #Calculate Cross Entropy Loss
        loss = self.criterion(out, y.long().squeeze())

        #Calculate Class Indicies
        y_hat = torch.max(out, 1)[1]

        #Calculate Balanced Accuracy
        val_accuracy = torch.Tensor([balanced_accuracy_score(y, y_hat)])

        #Calculate Balanced Accuracy
        val_f1 = torch.Tensor([f1_score(y, y_hat, average='weighted')])

        #Create a metrics object
        metrics = {
            'val_loss': loss,
            'val_accuracy': val_accuracy,
            'val_f1': val_f1
        }

        return metrics

    def validation_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['val_accuracy'] for x in outputs]).mean()
        avg_f1 = torch.stack([x['val_f1'] for x in outputs]).mean()

        tensorboard_logs = {
            'val_loss': avg_loss,
            'val_acc': avg_acc,
            'val_f1': avg_f1
        }

        return {'avg_val_loss': avg_loss, 'log': tensorboard_logs}

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

    @pl.data_loader
    def train_dataloader(self):
        # REQUIRED
        return DataLoader(KeplerDataset(mode="train"),
                          batch_size=64,
                          shuffle=True)

    @pl.data_loader
    def val_dataloader(self):
        # REQUIRED
        return DataLoader(KeplerDataset(mode="test"),
                          batch_size=128,
                          shuffle=True)
예제 #2
0
def train():
    model_path = config.get('model_path', './model/')
    log_step = config.get('log_step', 10)
    hidden_size = config.get('decoder_hidden_size', 512)
    num_epochs = config.get('num_epochs', 5)
    alpha_c = config.get('alpha_c', 1)

    # Create model directory
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # Used for calculating bleu scores
    references = []
    hypotheses = []

    # Load vocabulary
    vocab = build_and_save_vocab()

    # Build data loader
    data_loader = get_loader('train')
    data_loader_valid = get_loader('validate')

    # Build the models
    if config.get('checkpoint') is None:
        epochs_since_improvement = config.get('epochs_since_improvement')
        best_score = 0.
        encoder = Encoder(config.get('image_net')).to(device)
        decoder = Decoder(encoder.dim, len(vocab),
                          hidden_size=hidden_size).to(device)

        encoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, encoder.parameters()),
                                             lr=config.get('encoder_lr', 1e-4))

        decoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, decoder.parameters()),
                                             lr=config.get('decoder_lr', 1e-3))
    else:
        checkpoint = torch.load(config.get('checkpoint'))
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_score = checkpoint['best_score']
        encoder = checkpoint['encoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        decoder = checkpoint['decoder']
        decoder_optimizer = checkpoint['decoder_optimizer']

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()

    # Train the models
    total_step = len(data_loader)
    for epoch in range(num_epochs):
        if epochs_since_improvement == 20:
            print(
                'Reached the max epochs_since_improvement. Training is done.')
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(decoder_optimizer, 0.9)
            adjust_learning_rate(encoder_optimizer, 0.9)

        for i, (images, captions, lengths) in enumerate(data_loader):

            # Set mini-batch dataset
            images = images.to(device)
            captions = captions.to(device)

            # Forward, backward and optimize
            features = encoder.forward(images)
            prediction, alphas = decoder.forward(features, captions)

            att_regularization = alpha_c * ((1 - alphas.sum(1))**2).mean()
            loss = criterion(prediction.permute(0, 2, 1),
                             captions) + att_regularization

            decoder_optimizer.zero_grad()
            encoder_optimizer.zero_grad()

            loss.backward()

            decoder_optimizer.step()
            encoder_optimizer.step()

            total_caption_length = calculate_caption_lengths(
                vocab.word2idx, captions)
            acc1 = accuracy(prediction.permute(0, 2, 1), captions, 1)
            acc5 = accuracy(prediction.permute(0, 2, 1), captions, 5)
            losses.update(loss.item(), total_caption_length)
            top1.update(acc1, total_caption_length)
            top5.update(acc5, total_caption_length)

            # Print log info
            if i % log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, num_epochs, i, total_step, loss.item(),
                            np.exp(loss.item())))
                print(
                    'Top 1 Accuracy {top1.val:.3f} ({top1.avg:.3f}), Top 5 Accuracy {top5.val:.3f} ({top5.avg:.3f})'
                    .format(top1=top1, top5=top5))

            valid_score = validate(data_loader_valid, encoder, decoder,
                                   criterion, vocab)
            if valid_score >= best_score:
                epochs_since_improvement += 1
                print(
                    'Epochs since last improvement: {epochs_since_improvement}'
                )
                best_score = valid_score
            else:
                epochs_since_improvement = 0

                state_dict = {
                    'epoch': epoch,
                    'epochs_since_improvement': epochs_since_improvement,
                    'decoder': decoder,
                    'decoder_optimizer': decoder_optimizer,
                    'encoder': encoder,
                    'encoder_optimizer': encoder_optimizer,
                    'valid_score': valid_score,
                    'best_score': best_score
                }

                filename = 'checkpoint.pth.tar'
                torch.save(state_dict, filename)

            # Print log info
            if i % log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, num_epochs, i, total_step, loss.item(),
                            np.exp(loss.item())))
                print(
                    'Top 1 Accuracy {top1.val:.3f} ({top1.avg:.3f}), Top 5 Accuracy {top5.val:.3f} ({top5.avg:.3f})'
                    .format(top1=top1, top5=top5))
                print('Validate score %.3f' % (valid_score))
예제 #3
0
def train(train_ids, dev_ids, test_ids, dict_ids, enc_word_alphabet, enc_char_alphabet, dec_word_alphabet, dec_char_alphabet, position_alphabet,
          dictionary):

    enc_word_emb = initialize_emb(config.get('word_emb'), enc_word_alphabet, opt.word_emb_dim)
    if position_alphabet is not None:
        pos_emb = initialize_emb(None, position_alphabet, opt.pos_emb_dim)
    else:
        pos_emb = None
    if opt.use_char:
        enc_char_emb = initialize_emb(config.get('char_emb'), enc_char_alphabet, opt.char_emb_dim)
    else:
        enc_char_emb = None

    encoder = Encoder(enc_word_emb, pos_emb, enc_char_emb)

    if opt.method == 'cla':
        decoder = AttnNet(dictionary)

        train_loader = DataLoader(MyDataset(train_ids), opt.batch_size, shuffle=True, collate_fn=my_collate)
    else:
        dec_word_emb = initialize_emb(config.get('word_emb'), dec_word_alphabet, opt.word_emb_dim)
        if opt.use_char:
            dec_char_emb = initialize_emb(config.get('char_emb'), dec_char_alphabet, opt.char_emb_dim)
        else:
            dec_char_emb = None

        decoder = Decoder(dec_word_emb, dec_char_emb, dec_word_alphabet)


        if opt.batch_size != 1:
            raise RuntimeError("currently, only support batch size 1")
        train_loader = DataLoader(MyDataset(train_ids), opt.batch_size, shuffle=True, collate_fn=my_collate_1)

    optimizer = optim.Adam(itertools.chain(encoder.parameters(), decoder.parameters()), lr=opt.lr, weight_decay=opt.l2)

    if opt.tune_wordemb == False:
        encoder.free_emb()
        decoder.free_emb()

    if opt.pretraining:

        if opt.method == 'cla':
            dict_loader = DataLoader(MyDataset(dict_ids), opt.batch_size, shuffle=True, collate_fn=my_collate)
        else:
            dict_loader = DataLoader(MyDataset(dict_ids), opt.batch_size, shuffle=True, collate_fn=my_collate_1)

        logging.info("start dict pretraining ...")
        logging.info("dict pretraining datapoints: {}".format(len(dict_ids)))

        bad_counter = 0
        best_accuracy = 0

        for idx in range(9999):
            epoch_start = time.time()

            encoder.train()
            decoder.train()

            correct, total = 0, 0

            sum_loss = 0

            train_iter = iter(dict_loader)
            num_iter = len(dict_loader)

            for i in range(num_iter):

                if opt.method == 'cla':
                    enc_word_seq_tensor, enc_word_seq_lengths, enc_word_seq_recover, enc_mask, \
                    enc_char_seq_tensor, enc_char_seq_lengths, enc_char_seq_recover, label_tensor = next(train_iter)

                    encoder_outputs, _ = encoder.forward_batch(enc_word_seq_tensor, enc_word_seq_lengths, \
                                                                      enc_char_seq_tensor, enc_char_seq_lengths,
                                                                      enc_char_seq_recover)

                    loss, total_this_batch, correct_this_batch = decoder.forward_train(encoder_outputs, enc_word_seq_lengths, label_tensor)

                else:

                    enc_word_seq_tensor, enc_pos_tensor, \
                    enc_char_seq_tensor, enc_char_seq_lengths, enc_char_seq_recover, dec_word_seq_tensor, \
                    label_tensor, dec_char_seq_tensor = next(train_iter)

                    encoder_outputs, encoder_hidden = encoder.forward(enc_word_seq_tensor, enc_pos_tensor, \
                                                                      enc_char_seq_tensor, enc_char_seq_lengths,
                                                                      enc_char_seq_recover)

                    loss, total_this_batch, correct_this_batch = decoder.forward_train(encoder_outputs, encoder_hidden,
                                                                                       dec_word_seq_tensor, label_tensor,
                                                                                       dec_char_seq_tensor)

                sum_loss += loss.item()

                loss.backward()

                if opt.gradient_clip > 0:
                    torch.nn.utils.clip_grad_norm_(encoder.parameters(), opt.gradient_clip)
                    torch.nn.utils.clip_grad_norm_(decoder.parameters(), opt.gradient_clip)
                optimizer.step()
                encoder.zero_grad()
                decoder.zero_grad()

                total += total_this_batch
                correct += correct_this_batch

            epoch_finish = time.time()
            accuracy = 100.0 * correct / total
            logging.info("epoch: %s pretraining finished. Time: %.2fs. loss: %.4f Accuracy %.2f" % (
                idx, epoch_finish - epoch_start, sum_loss / num_iter, accuracy))

            if accuracy > opt.expected_accuracy:
                logging.info("Exceed expected training accuracy, breaking ... ")
                break

            if accuracy > best_accuracy:
                logging.info("Exceed previous best accuracy: %.2f" % (best_accuracy))
                best_accuracy = accuracy

                bad_counter = 0
            else:
                bad_counter += 1

            if bad_counter >= opt.patience:
                logging.info('Pretraining Early Stop!')
                break


    best_dev_f = -10

    bad_counter = 0

    logging.info("start training ...")
    logging.info("training datapoints: {}".format(len(train_ids)))
    if dev_ids is not None and len(dev_ids) != 0:
        logging.info("dev datapoints: {}".format(len(dev_ids)))
    if test_ids is not None and len(test_ids) != 0:
        logging.info("test datapoints: {}".format(len(test_ids)))

    for idx in range(opt.iter):
        epoch_start = time.time()

        encoder.train()
        decoder.train()

        train_iter = iter(train_loader)
        num_iter = len(train_loader)

        sum_loss = 0

        correct, total = 0, 0

        for i in range(num_iter):
            if opt.method == 'cla':
                enc_word_seq_tensor, enc_word_seq_lengths, enc_word_seq_recover, enc_mask, \
                enc_char_seq_tensor, enc_char_seq_lengths, enc_char_seq_recover, label_tensor = next(train_iter)

                encoder_outputs, _ = encoder.forward_batch(enc_word_seq_tensor, enc_word_seq_lengths, \
                                                           enc_char_seq_tensor, enc_char_seq_lengths,
                                                           enc_char_seq_recover)

                loss, total_this_batch, correct_this_batch = decoder.forward_train(encoder_outputs,
                                                                                   enc_word_seq_lengths, label_tensor)
            else:
                enc_word_seq_tensor, enc_pos_tensor, \
                enc_char_seq_tensor, enc_char_seq_lengths, enc_char_seq_recover, dec_word_seq_tensor, \
                label_tensor, dec_char_seq_tensor = next(train_iter)

                encoder_outputs, encoder_hidden = encoder.forward(enc_word_seq_tensor, enc_pos_tensor, \
                    enc_char_seq_tensor, enc_char_seq_lengths, enc_char_seq_recover)


                loss, total_this_batch, correct_this_batch = decoder.forward_train(encoder_outputs, encoder_hidden,
                                                                  dec_word_seq_tensor, label_tensor,
                                                                  dec_char_seq_tensor)

            sum_loss += loss.item()

            loss.backward()

            if opt.gradient_clip > 0:
                torch.nn.utils.clip_grad_norm_(encoder.parameters(), opt.gradient_clip)
                torch.nn.utils.clip_grad_norm_(decoder.parameters(), opt.gradient_clip)
            optimizer.step()
            encoder.zero_grad()
            decoder.zero_grad()

            total += total_this_batch
            correct += correct_this_batch

        epoch_finish = time.time()
        accuracy = 100.0 * correct / total
        logging.info("epoch: %s training finished. Time: %.2fs. loss: %.4f Accuracy %.2f" % (
            idx, epoch_finish - epoch_start, sum_loss / num_iter, accuracy))


        if dev_ids is not None and len(dev_ids) != 0:
            if opt.method == 'cla':
                p, r, f = evaluate_cla(dev_ids, encoder, decoder, dictionary)
            else:
                p, r, f = evaluate(dev_ids, encoder, decoder, dec_word_alphabet, dec_char_alphabet, dictionary)
            logging.info("Dev: p: %.4f, r: %.4f, f: %.4f" % (p, r, f))
        else:
            f = best_dev_f

        if f > best_dev_f:
            logging.info("Exceed previous best f score on dev: %.4f" % (best_dev_f))

            best_dev_f = f

            bad_counter = 0

            torch.save(encoder, os.path.join(opt.output, "encoder.pkl"))
            torch.save(decoder, os.path.join(opt.output, "decoder.pkl"))
            torch.save(enc_word_alphabet, os.path.join(opt.output, "enc_word_alphabet.pkl"))
            torch.save(enc_char_alphabet, os.path.join(opt.output, "enc_char_alphabet.pkl"))
            torch.save(dec_word_alphabet, os.path.join(opt.output, "dec_word_alphabet.pkl"))
            torch.save(dec_char_alphabet, os.path.join(opt.output, "dec_char_alphabet.pkl"))
            torch.save(position_alphabet, os.path.join(opt.output, "position_alphabet.pkl"))

            if test_ids is not None and len(test_ids) != 0:
                if opt.method == 'cla':
                    p, r, f = evaluate_cla(test_ids, encoder, decoder, dictionary)
                else:
                    p, r, f = evaluate(test_ids, encoder, decoder, dec_word_alphabet, dec_char_alphabet, dictionary)
                logging.info("Test: p: %.4f, r: %.4f, f: %.4f" % (p, r, f))

        else:
            bad_counter += 1

        if bad_counter >= opt.patience:
            logging.info('Early Stop!')
            break

    logging.info("train finished")