Ejemplo n.º 1
0
def train(dim, args):
    import torch
    from torch import nn, optim
    import numpy as np
    from features import ExtractWordEmbeddings
    from preprocess_data import batchify, padBatch
    from models.lstm import LSTMClassifier
    from sklearn.utils import shuffle

    # hyperparameters
    embedding_dim = 300  # changes only with different word embeddings
    hidden_dim = args.hidden_dim
    max_epochs = args.max_epochs
    is_cuda = True
    batch_size = 60
    lr = args.lr
    n_decreases = 10
    save_dir = 'weights/LSTM/%s' % dim
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    """
    Loading train / validation datasets
    X_tr: a list of tokenized sentences
    y_tr: a list of 0 and 1
    """
    X_tr, y_tr = loadDatasetForLSTM(dim,
                                    'train')  # a list of tokenized sentences
    X_d, y_d = loadDatasetForLSTM(dim, 'dev')

    # load model and settings for training
    model = LSTMClassifier(embedding_dim=embedding_dim, hidden_dim=hidden_dim)
    if is_cuda:
        model.cuda()
    optimizer = optim.AdamW(model.parameters(), lr=lr)
    flag = True
    old_val = np.inf  # previous validation error
    em = ExtractWordEmbeddings(emb_type='glove')
    loss_fn = nn.BCELoss()

    # train model
    epoch = 0
    cnt_decrease = 0
    while (flag):
        tr_loss = 0.0
        epoch += 1
        if (epoch > max_epochs) | (cnt_decrease > n_decreases):
            break
        # train
        model.train()
        # for each iteration, shuffles X_tr and y_tr and puts them into batches
        X_tr, y_tr = shuffle(X_tr, y_tr)
        tr_batches = batchify(X_tr, y_tr, batch_size)
        for X_b, y_b in tr_batches:
            # X_b is still a list of tokenized sentences (list of list of words)
            optimizer.zero_grad()
            """
            obtain_vectors_from_sentence(sent=list of words, include_unk=True)
            : changes each word into an embedding, and returns a list of embeddings
            padBatch(list of embedding lists, max_seq=None)
            : for each batch, returns a tensor fixed to the max size, applies zero padding
            """
            inputs = torch.tensor(
                padBatch([
                    em.obtain_vectors_from_sentence(sent, True) for sent in X_b
                ])).float()
            # here, inputs become a tensor of shape (B * seq_len * dim)
            targets = torch.tensor(y_b, dtype=torch.float32)
            if is_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)  # error here
            loss.backward()
            tr_loss += loss.item()
            optimizer.step()

        print("[Epoch %d] train loss: %1.3f" % (epoch, tr_loss))

        # validate
        model.eval()
        current_loss = 0.0
        X_d, y_d = shuffle(X_d, y_d)
        val_batches = batchify(X_d, y_d, batch_size)
        with torch.no_grad():
            for X_b, y_b in val_batches:
                inputs = torch.tensor(
                    padBatch([
                        em.obtain_vectors_from_sentence(sent, True)
                        for sent in X_b
                    ])).float()
                targets = torch.tensor(y_b, dtype=torch.float32)
                if is_cuda:
                    inputs, targets = inputs.cuda(), targets.cuda()
                outputs = model(inputs)
                loss = loss_fn(outputs, targets)  # error here
                current_loss += loss.item()

        print("[Epoch %d] validation loss: %1.3f" % (epoch, current_loss))
        if current_loss < old_val:
            # if current round is better than the previous round
            best_state = model.state_dict()  # save this model
            torch.save(best_state, join(save_dir, 'best-weights.pth'))
            print("Updated model")
            old_val = current_loss
            cnt_decrease = 0
        else:
            # if the current round is doing worse
            cnt_decrease += 1

        if cnt_decrease >= n_decreases:
            flag = False
    return
Ejemplo n.º 2
0
        else:
            train_with_teacher_logits(trainloader,
                                      student,
                                      target_logist_list,
                                      Config.temp,
                                      optimizer,
                                      scheduler=scheduler,
                                      device=device)

        # evaluate it
        valid_acc = evalation(validloader, student, device=device, tag="Valid")
        test_acc = evalation(testloader, student, device=device, tag="Test")

        if valid_acc > best_valid_acc:
            best_valid_acc = valid_acc
            best_student = student.state_dict()
            corresp_test_acc = test_acc

            saving_dict = {
                'epoch': epoch,
                'state_dict': best_student,
                'best_valid_acc': best_valid_acc,
                'corresp_test_acc': corresp_test_acc
            }

            torch.save(
                saving_dict,
                Config.student_tar_fmt.format(plbl=Config.part_labeled,
                                              temp=Config.temp))

    print("Finish training student!")
Ejemplo n.º 3
0
            # evaluate on cpu
            predictions = np.array(predictions.cpu())

            all_predictions = np.concatenate((all_predictions, predictions))

        # Get results
        preprocess.plot_confusion_matrix(all_targets,
                                         all_predictions,
                                         classes=list(labels_dict.keys()),
                                         epoch=e,
                                         model_code=conf['code'])
        performance = preprocess.evaluate(all_targets, all_predictions)
        writer.add_scalars('metrics/performance', performance, iter)
        print('Test A: {acc} | P: {precision} | R: {recall} | F: {f1}\n\n'.
              format(**performance))
        if performance['f1'] > best_f1:
            best_f1 = performance['f1']
            # save model and results
            torch.save(
                {
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                }, 'saved_models/{}_{}_best_model.pth'.format(
                    conf['code'], conf['operation']))

            with open(
                    'saved_models/{}_{}_best_performance.pkl'.format(
                        conf['code'], conf['operation']), 'wb') as f:
                pickle.dump(performance, f)
writer.close()