예제 #1
0
class LSTMTrainer(Trainer):
    def __init__(self, opt, emb_matrix=None):
        self.opt = opt
        self.emb_matrix = emb_matrix
        self.model = LSTMClassifier(opt, emb_matrix=emb_matrix)
        self.criterion = nn.CrossEntropyLoss()
        self.parameters = [p for p in self.model.parameters() if p.requires_grad]
        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()
        self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])
    
    def update(self, batch):
        inputs, labels = unpack_batch(batch)

        # Step 1 init and forward
        self.model.train()
        self.optimizer.zero_grad()

        logits = self.model(inputs)
        loss = self.criterion(logits, labels)
        loss_val = loss.item()

        # Step 2 backward
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm'])

        # Step 3 update
        self.optimizer.step()
        return loss_val 
    
    def predict(self, batch, unsort=True):
        inputs, labels = unpack_batch(batch)
        
        self.model.eval()

        logits = self.model(inputs)
        loss = self.criterion(logits, labels)
        loss_val = loss.item()
        
        probs = F.softmax(logits, 1).data.cpu().numpy().tolist()
        predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()
        labels = labels.data.cpu().numpy().tolist()
        return predictions, probs, labels, loss_val
예제 #2
0
def train(dim, args):
    import torch
    from torch import nn, optim
    import numpy as np
    from features import ExtractWordEmbeddings
    from preprocess_data import batchify, padBatch
    from models.lstm import LSTMClassifier
    from sklearn.utils import shuffle

    # hyperparameters
    embedding_dim = 300  # changes only with different word embeddings
    hidden_dim = args.hidden_dim
    max_epochs = args.max_epochs
    is_cuda = True
    batch_size = 60
    lr = args.lr
    n_decreases = 10
    save_dir = 'weights/LSTM/%s' % dim
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    """
    Loading train / validation datasets
    X_tr: a list of tokenized sentences
    y_tr: a list of 0 and 1
    """
    X_tr, y_tr = loadDatasetForLSTM(dim,
                                    'train')  # a list of tokenized sentences
    X_d, y_d = loadDatasetForLSTM(dim, 'dev')

    # load model and settings for training
    model = LSTMClassifier(embedding_dim=embedding_dim, hidden_dim=hidden_dim)
    if is_cuda:
        model.cuda()
    optimizer = optim.AdamW(model.parameters(), lr=lr)
    flag = True
    old_val = np.inf  # previous validation error
    em = ExtractWordEmbeddings(emb_type='glove')
    loss_fn = nn.BCELoss()

    # train model
    epoch = 0
    cnt_decrease = 0
    while (flag):
        tr_loss = 0.0
        epoch += 1
        if (epoch > max_epochs) | (cnt_decrease > n_decreases):
            break
        # train
        model.train()
        # for each iteration, shuffles X_tr and y_tr and puts them into batches
        X_tr, y_tr = shuffle(X_tr, y_tr)
        tr_batches = batchify(X_tr, y_tr, batch_size)
        for X_b, y_b in tr_batches:
            # X_b is still a list of tokenized sentences (list of list of words)
            optimizer.zero_grad()
            """
            obtain_vectors_from_sentence(sent=list of words, include_unk=True)
            : changes each word into an embedding, and returns a list of embeddings
            padBatch(list of embedding lists, max_seq=None)
            : for each batch, returns a tensor fixed to the max size, applies zero padding
            """
            inputs = torch.tensor(
                padBatch([
                    em.obtain_vectors_from_sentence(sent, True) for sent in X_b
                ])).float()
            # here, inputs become a tensor of shape (B * seq_len * dim)
            targets = torch.tensor(y_b, dtype=torch.float32)
            if is_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)  # error here
            loss.backward()
            tr_loss += loss.item()
            optimizer.step()

        print("[Epoch %d] train loss: %1.3f" % (epoch, tr_loss))

        # validate
        model.eval()
        current_loss = 0.0
        X_d, y_d = shuffle(X_d, y_d)
        val_batches = batchify(X_d, y_d, batch_size)
        with torch.no_grad():
            for X_b, y_b in val_batches:
                inputs = torch.tensor(
                    padBatch([
                        em.obtain_vectors_from_sentence(sent, True)
                        for sent in X_b
                    ])).float()
                targets = torch.tensor(y_b, dtype=torch.float32)
                if is_cuda:
                    inputs, targets = inputs.cuda(), targets.cuda()
                outputs = model(inputs)
                loss = loss_fn(outputs, targets)  # error here
                current_loss += loss.item()

        print("[Epoch %d] validation loss: %1.3f" % (epoch, current_loss))
        if current_loss < old_val:
            # if current round is better than the previous round
            best_state = model.state_dict()  # save this model
            torch.save(best_state, join(save_dir, 'best-weights.pth'))
            print("Updated model")
            old_val = current_loss
            cnt_decrease = 0
        else:
            # if the current round is doing worse
            cnt_decrease += 1

        if cnt_decrease >= n_decreases:
            flag = False
    return