Example #1
0
def load_model(filename, using_GPU):
    RNNseq_model = RNNSequenceModel(num_classes=2, embedding_dim=300 + 1024, hidden_size=300, num_layers=1, bidir=True,
                                    dropout1=0.5, dropout2=0, dropout3=0.1)
    RNNseq_model.load_state_dict(torch.load(filename))
    if using_GPU:
        RNNseq_model.cuda()
    return RNNseq_model
Example #2
0
def load_model(filename, embedding_dim):
    model = RNNSequenceModel(num_classes=2,
                             embedding_dim=embedding_dim,
                             hidden_size=hidden_size,
                             num_layers=1,
                             bidir=True,
                             dropout1=dropouts[0],
                             dropout2=dropouts[1],
                             dropout3=dropouts[2])
    model.load_state_dict(torch.load(filename))
    if using_GPU:
        model.cuda()
    return model
Example #3
0
"""
3. Model training
"""
'''
3. 1 
set up model, loss criterion, optimizer
'''
# Instantiate the model
# embedding_dim = glove + elmo + suffix indicator
# dropout1: dropout on input to RNN
# dropout2: dropout in RNN; would be used if num_layers!=1
# dropout3: dropout on hidden state of RNN to linear layer
RNNseq_model = RNNSequenceModel(num_classes=2,
                                embedding_dim=300 + 1024,
                                hidden_size=300,
                                num_layers=1,
                                bidir=True,
                                dropout1=0.5,
                                dropout2=0,
                                dropout3=0.1)
# Move the model to the GPU if available
if using_GPU:
    RNNseq_model = RNNseq_model.cuda()
# Set up criterion for calculating loss
loss_criterion = nn.NLLLoss()
# Set up an optimizer for updating the parameters of the rnn_clf
rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.005)
# Number of epochs (passes through the dataset) to train the model for.
num_epochs = 10
'''
3. 2
train model
Example #4
0
def train_model():
    """
    3. Model training
    """
    '''
    3. 1 
    set up model, loss criterion, optimizer
    '''
    # Instantiate the model
    # embedding_dim = glove + elmo + suffix indicator
    # dropout1: dropout on input to RNN
    # dropout2: dropout in RNN; would be used if num_layers!=1
    # dropout3: dropout on hidden state of RNN to linear layer
    RNNseq_model = RNNSequenceModel(num_classes=2,
                                    embedding_dim=300 + 1024,
                                    hidden_size=300,
                                    num_layers=1,
                                    bidir=True,
                                    dropout1=0.5,
                                    dropout2=0,
                                    dropout3=0.1)
    # Move the model to the GPU if available
    if using_GPU:
        RNNseq_model = RNNseq_model.cuda()
    # Set up criterion for calculating loss
    loss_criterion = nn.NLLLoss()
    # Set up an optimizer for updating the parameters of the rnn_clf
    rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.005)
    # Number of epochs (passes through the dataset) to train the model for.
    num_epochs = 10
    '''
    3. 2
    train model
    '''
    train_loss = []
    val_loss = []
    performance_matrix = None
    val_f1s = []
    train_f1s = []
    # A counter for the number of gradient updates
    num_iter = 0
    comparable = []
    for epoch in tqdm(range(num_epochs)):
        # print("Starting epoch {}".format(epoch + 1))
        for (__, example_text, example_lengths,
             labels) in train_dataloader_vua:
            example_text = Variable(example_text)
            example_lengths = Variable(example_lengths)
            labels = Variable(labels)
            if using_GPU:
                example_text = example_text.cuda()
                example_lengths = example_lengths.cuda()
                labels = labels.cuda()
            # predicted shape: (batch_size, seq_len, 2)
            predicted = RNNseq_model(example_text, example_lengths)
            batch_loss = loss_criterion(predicted.view(-1, 2), labels.view(-1))
            rnn_optimizer.zero_grad()
            batch_loss.backward()
            rnn_optimizer.step()
            num_iter += 1
            # Calculate validation and training set loss and accuracy every 200 gradient updates
            if num_iter % 200 == 0:
                avg_eval_loss, performance_matrix = evaluate(
                    idx2pos, val_dataloader_vua, RNNseq_model, loss_criterion,
                    using_GPU)
                val_loss.append(avg_eval_loss)
                val_f1s.append(performance_matrix[:, 2])
                # print("Iteration {}. Validation Loss {}.".format(num_iter, avg_eval_loss))
    #             avg_eval_loss, performance_matrix = evaluate(idx2pos, train_dataloader_vua, RNNseq_model,
    #                                                          loss_criterion, using_GPU)
    #             train_loss.append(avg_eval_loss)
    #             train_f1s.append(performance_matrix[:, 2])
    #             print("Iteration {}. Training Loss {}.".format(num_iter, avg_eval_loss))
    """
    for additional training
    """
    rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.0001)
    for epoch in range(10):
        # print("Starting epoch {}".format(epoch + 1))
        for (__, example_text, example_lengths,
             labels) in train_dataloader_vua:
            example_text = Variable(example_text)
            example_lengths = Variable(example_lengths)
            labels = Variable(labels)
            if using_GPU:
                example_text = example_text.cuda()
                example_lengths = example_lengths.cuda()
                labels = labels.cuda()
            # predicted shape: (batch_size, seq_len, 2)
            predicted = RNNseq_model(example_text, example_lengths)
            batch_loss = loss_criterion(predicted.view(-1, 2), labels.view(-1))
            rnn_optimizer.zero_grad()
            batch_loss.backward()
            rnn_optimizer.step()
            num_iter += 1
            # Calculate validation and training set loss and accuracy every 200 gradient updates
            if num_iter % 200 == 0:
                avg_eval_loss, performance_matrix = evaluate(
                    idx2pos, val_dataloader_vua, RNNseq_model, loss_criterion,
                    using_GPU)
                val_loss.append(avg_eval_loss)
                val_f1s.append(performance_matrix[:, 2])
                # print("Iteration {}. Validation Loss {}.".format(num_iter, avg_eval_loss))

    #             avg_eval_loss, performance_matrix = evaluate(idx2pos, train_dataloader_vua, RNNseq_model,
    #                                                          loss_criterion, using_GPU)
    #             train_loss.append(avg_eval_loss)
    #             train_f1s.append(performance_matrix[:, 2])
    #             print("Iteration {}. Training Loss {}.".format(num_iter, avg_eval_loss))
    #             comparable.append(get_performance())

    # print("Training done!")
    return RNNseq_model, loss_criterion
Example #5
0
def train_model(train_dataloader,
                val_dataloader,
                embedding_dim,
                num_epochs=20,
                print_every=200,
                prefix=dataset,
                verbose=True):
    model = RNNSequenceModel(num_classes=2,
                             embedding_dim=embedding_dim,
                             hidden_size=hidden_size,
                             num_layers=1,
                             bidir=True,
                             dropout1=dropouts[0],
                             dropout2=dropouts[1],
                             dropout3=dropouts[2])
    model = model.cuda() if using_GPU else model

    loss_weight = None
    if class_weight:
        loss_weight = torch.Tensor(class_weight)
        loss_weight = loss_weight.cuda() if using_GPU else loss_weight

    loss_criterion = nn.NLLLoss(weight=loss_weight,
                                reduction='sum' if class_weight else 'mean')
    rnn_optimizer = optim.Adam(model.parameters(), lr=0.005)

    optimal_score = [0, 0, 0, 0, 0]  # Iteration, Acc, Precision, Recall, F1
    optimal_state_dict = None
    num_iter = 0  # Number of gradient updates
    train_confusion_matrix = np.zeros(
        (2,
         2))  # Keep track of training performance - resets every 200 updates
    for epoch in range(num_epochs):
        # Slower learning rate
        if epoch == num_epochs / 2:
            rnn_optimizer = optim.Adam(model.parameters(), lr=0.001)

        for (example_text, example_lengths, example_labels,
             example_features) in train_dataloader:
            example_text = Variable(example_text)
            example_lengths = Variable(example_lengths)
            example_labels = Variable(example_labels)
            if using_GPU:
                example_text = example_text.cuda()
                example_lengths = example_lengths.cuda()
                example_labels = example_labels.cuda()
            # predicted shape: (batch_size, seq_len, 2)
            predicted = model(example_text, example_lengths)
            batch_loss = loss_criterion(predicted.view(-1, 2),
                                        example_labels.view(-1))
            rnn_optimizer.zero_grad()
            batch_loss.backward()
            rnn_optimizer.step()
            num_iter += 1
            # Get predictions, update confusion matrix
            _, predicted_labels = torch.max(predicted.data, 2)
            train_confusion_matrix = eval_util.update_confusion_matrix(
                train_confusion_matrix, predicted_labels, example_labels.data)
            # Calculate validation and training set loss and accuracy every 200 gradient updates
            if num_iter % print_every == 0:
                if verbose:
                    train_performance = eval_util.print_info(
                        train_confusion_matrix)
                    train_confusion_matrix = np.zeros((2, 2))
                    print(f"Iteration {num_iter}")
                    print(
                        f"Trn Performance: {train_performance}, Loss {batch_loss.item()}"
                    )

                if val_dataloader is not None:
                    avg_eval_loss, performance = eval_util.evaluate(
                        val_dataloader, model, loss_criterion, using_GPU)
                    if performance[-1] > optimal_score[-1]:
                        optimal_score = performance
                        optimal_state_dict = model.state_dict()
                    if verbose:
                        print(
                            f"Val Performance: {performance}, Loss {avg_eval_loss}"
                        )
                filename = f"models/{prefix}_iter_{num_iter}.pt"
                torch.save(model.state_dict(), filename)
    model.load_state_dict(optimal_state_dict)
    return model, optimal_score
Example #6
0
"""
3. Model training
"""
'''
3. 1 
set up model, loss criterion, optimizer
'''
# Instantiate the model
# embedding_dim = glove + elmo + pos indicator
# dropout1: dropout on input to RNN
# dropout2: dropout in RNN; would be used if num_layers!=1
# dropout3: dropout on hidden state of RNN to linear layer
RNNseq_model = RNNSequenceModel(num_classes=2,
                                embedding_dim=300 + 1024,
                                hidden_size=300,
                                num_layers=1,
                                bidir=True,
                                dropout1=0.2,
                                dropout2=0.2,
                                dropout3=0.2)
# Move the model to the GPU if available
if using_GPU:
    RNNseq_model = RNNseq_model.cuda()
# Set up criterion for calculating loss
loss_criterion = nn.NLLLoss()
# Set up an optimizer for updating the parameters of the rnn_clf
rnn_optimizer = optim.SGD(RNNseq_model.parameters(), lr=0.08, momentum=0.9)
# Number of epochs (passes through the dataset) to train the model for.
num_epochs = 15
'''
3. 2
train model
Example #7
0
3. Model training
"""
'''
3. 1 
set up model, loss criterion, optimizer
'''
# Instantiate the model
# embedding_dim = glove + elmo + suffix indicator
# dropout1: dropout on input to RNN
# dropout2: dropout in RNN; would be used if num_layers!=1
# dropout3: dropout on hidden state of RNN to linear layer
RNNseq_model = RNNSequenceModel(num_classes=2,
                                embedding_dim=300 + 1024 + 250 + 30,
                                hidden_size=300,
                                num_layers=1,
                                bidir=True,
                                char_vocab_size=len(c2idx),
                                char_embed_dim=50,
                                dropout1=0.5,
                                dropout2=0,
                                dropout3=0.1)

Transformer_model = Transformer(emb=300 + 1024 + 250 + 30,
                                k=300,
                                heads=1,
                                depth=1,
                                num_classes=2,
                                char_vocab_size=len(c2idx),
                                char_embed_dim=50)

transformer_parameters = sum(p.numel() for p in Transformer_model.parameters()
                             if p.requires_grad)
Example #8
0
def train_model(train_dataloader, val_dataloader, fold_num, idx2pos, using_GPU):
    optimal_f1s = []
    optimal_ps = []
    optimal_rs = []
    optimal_accs = []
    predictions_all = []

    RNNseq_model = RNNSequenceModel(num_classes=2, embedding_dim=300 + 1024, hidden_size=300, num_layers=1, bidir=True,
                                    dropout1=0.5, dropout2=0, dropout3=0.1)
    # Move the model to the GPU if available
    if using_GPU:
        RNNseq_model = RNNseq_model.cuda()
    # Set up criterion for calculating loss
    loss_criterion = nn.NLLLoss()
    # Set up an optimizer for updating the parameters of the rnn_clf
    rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.005)
    # Number of epochs (passes through the dataset) to train the model for.
    num_epochs = 20

    '''
    3. 2
    train model
    '''
    train_loss = []
    val_loss = []
    performance_matrix = None
    val_f1s = []
    train_f1s = []
    # A counter for the number of gradient updates
    num_iter = 0
    comparable = []
    for epoch in range(num_epochs):
        # print("Starting epoch {}".format(epoch + 1))
        for (_, example_text, example_lengths, example_labels) in train_dataloader:
            example_text = Variable(example_text)
            example_lengths = Variable(example_lengths)
            example_labels = Variable(example_labels)
            if using_GPU:
                example_text = example_text.cuda()
                example_lengths = example_lengths.cuda()
                example_labels = example_labels.cuda()
            # predicted shape: (batch_size, seq_len, 2)
            predicted = RNNseq_model(example_text, example_lengths)
            #
            # _, predicted_labels = torch.max(predicted.data, 2)
            # print("# pred M:", torch.sum(predicted_labels), "# actual M:", torch.sum(example_labels))
            #
            batch_loss = loss_criterion(predicted.view(-1, 2), example_labels.view(-1))
            rnn_optimizer.zero_grad()
            batch_loss.backward()
            rnn_optimizer.step()
            num_iter += 1
            # Calculate validation and training set loss and accuracy every 200 gradient updates
            if num_iter % 100 == 0:
                avg_eval_loss, performance_matrix = evaluate(idx2pos, val_dataloader, RNNseq_model, loss_criterion, using_GPU)
                val_loss.append(avg_eval_loss)
                val_f1s.append(performance_matrix[:, 2])
                print("Iteration {}. Validation Loss {}. {}".format(num_iter, avg_eval_loss, performance_matrix))
                filename = f"../models/sequence/TOEFL_fold_{fold_num}_iter_{num_iter}.pt"
                torch.save(RNNseq_model.state_dict(), filename)
                # avg_eval_loss, performance_matrix = evaluate(idx2pos, train_dataloader_vua, RNNseq_model,
                #                                              loss_criterion, using_GPU)
    #             train_loss.append(avg_eval_loss)
    #             train_f1s.append(performance_matrix[:, 2])
    #             print("Iteration {}. Training Loss {}.".format(num_iter, avg_eval_loss))

    return RNNseq_model
Example #9
0
def train_model():
    '''
    2. 3 10-fold cross validation
    '''
    # separate the embedded_sentences and labels into 2 list, in order to pass into the TextDataset as argument
    sentences = [example[0] for example in embedded_trofi]
    poss = [example[1] for example in embedded_trofi]
    labels = [example[2] for example in embedded_trofi]
    # ten_folds is a list of 10 tuples, each tuple is (list_of_embedded_sentences, list_of_corresponding_labels)
    ten_folds = []
    fold_size = int(3737 / 10)
    for i in range(10):
        ten_folds.append((sentences[i * fold_size:(i + 1) * fold_size],
                          poss[i * fold_size:(i + 1) * fold_size],
                          labels[i * fold_size:(i + 1) * fold_size]))

    idx2pos = {0: 'words that are not focus verbs', 1: 'focus verb'}

    optimal_f1s = []
    optimal_ps = []
    optimal_rs = []
    optimal_accs = []
    predictions_all = []
    for i in tqdm(range(10)):
        '''
        2. 3
        set up Dataloader for batching
        '''
        training_sentences = []
        training_labels = []
        training_poss = []
        for j in range(10):
            if j != i:
                training_sentences.extend(ten_folds[j][0])
                training_poss.extend(ten_folds[j][1])
                training_labels.extend(ten_folds[j][2])
        training_dataset_trofi = TextDataset(training_sentences, training_poss,
                                             training_labels)
        val_dataset_trofi = TextDataset(ten_folds[i][0], ten_folds[i][1],
                                        ten_folds[i][2])

        # Data-related hyperparameters
        batch_size = 10
        # Set up a DataLoader for the training, validation, and test dataset
        train_dataloader_trofi = DataLoader(dataset=training_dataset_trofi,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            collate_fn=TextDataset.collate_fn)
        val_dataloader_trofi = DataLoader(dataset=val_dataset_trofi,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          collate_fn=TextDataset.collate_fn)
        """
        3. Model training
        """
        '''
        3. 1 
        set up model, loss criterion, optimizer
        '''
        # Instantiate the model
        # embedding_dim = glove + elmo + suffix indicator
        # dropout1: dropout on input to RNN
        # dropout2: dropout in RNN; would be used if num_layers=1
        # dropout3: dropout on hidden state of RNN to linear layer
        RNNseq_model = RNNSequenceModel(num_classes=2,
                                        embedding_dim=300 + 1024,
                                        hidden_size=300,
                                        num_layers=1,
                                        bidir=True,
                                        dropout1=0.5,
                                        dropout2=0,
                                        dropout3=0.2)
        # Move the model to the GPU if available
        if using_GPU:
            RNNseq_model = RNNseq_model.cuda()
        # Set up criterion for calculating loss
        loss_criterion = nn.NLLLoss()
        # Set up an optimizer for updating the parameters of the rnn_clf
        rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.001)
        # Number of epochs (passes through the dataset) to train the model for.
        num_epochs = 10
        '''
        3. 2
        train model
        '''
        train_loss = []
        val_loss = []
        performance_matrix = None
        val_f1 = []
        val_p = []
        val_r = []
        val_acc = []
        train_f1 = []
        # A counter for the number of gradient updates
        num_iter = 0
        model_index = 0
        comparable = []
        for epoch in range(num_epochs):
            # print("Starting epoch {}".format(epoch + 1))
            for (__, example_text, example_lengths,
                 labels) in train_dataloader_trofi:
                example_text = Variable(example_text)
                example_lengths = Variable(example_lengths)
                labels = Variable(labels)
                if using_GPU:
                    example_text = example_text.cuda()
                    example_lengths = example_lengths.cuda()
                    labels = labels.cuda()
                # predicted shape: (batch_size, seq_len, 2)
                predicted = RNNseq_model(example_text, example_lengths)
                batch_loss = loss_criterion(predicted.view(-1, 2),
                                            labels.view(-1))
                rnn_optimizer.zero_grad()
                batch_loss.backward()
                rnn_optimizer.step()
                num_iter += 1
                # Calculate validation and training set loss and accuracy every 200 gradient updates
                if num_iter % 200 == 0:
                    avg_eval_loss, performance_matrix = evaluate(
                        idx2pos, val_dataloader_trofi, RNNseq_model,
                        loss_criterion, using_GPU)
                    val_loss.append(avg_eval_loss)
                    val_p.append(performance_matrix[1][0])
                    val_r.append(performance_matrix[1][1])
                    val_f1.append(performance_matrix[1][2])
                    val_acc.append(performance_matrix[1][3])
                    # print("Iteration {}. Validation Loss {}.".format(num_iter, avg_eval_loss))
    #                 avg_eval_loss, performance_matrix = evaluate(idx2pos, train_dataloader_trofi, RNNseq_model,
    #                                                              loss_criterion, using_GPU)
    #                 train_loss.append(avg_eval_loss)
    #                 train_f1.append(performance_matrix[1][1])
    #                 print("Iteration {}. Training Loss {}.".format(num_iter, avg_eval_loss))
    #     print("Training done for fold {}".format(i))
        """
        3.3
        plot the training process: MET F1 and losses for validation and training dataset
        """
        #     plt.figure(0)
        #     plt.title('F1 for TroFI dataset on fold ' + str(i))
        #     plt.xlabel('iteration (unit:200)')
        #     plt.ylabel('F1')
        #     plt.plot(val_f1, 'g')
        #     #     plt.plot(train_f1, 'b')
        #     plt.legend(['Validation F1', 'Training F1'], loc='upper right')
        #     plt.show()

        #     plt.figure(1)
        #     plt.title('Loss for TroFi dataset on fold ' + str(i))
        #     plt.xlabel('iteration (unit:200)')
        #     plt.ylabel('Loss')
        #     plt.plot(val_loss, 'g')
        #     #     plt.plot(train_loss, 'b')
        #     plt.legend(['Validation loss', 'Training loss'], loc='upper right')
        #     plt.show()
        """
        store the best f1
        """
        # print('val_f1: ', val_f1)
        idx = 0
        if math.isnan(max(val_f1)):
            optimal_f1s.append(max(val_f1[6:]))
            idx = val_f1.index(optimal_f1s[-1])
            optimal_ps.append(val_p[idx])
            optimal_rs.append(val_r[idx])
            optimal_accs.append(val_acc[idx])
        else:
            optimal_f1s.append(max(val_f1))
            idx = val_f1.index(optimal_f1s[-1])
            optimal_ps.append(val_p[idx])
            optimal_rs.append(val_r[idx])
            optimal_accs.append(val_acc[idx])
    """
    print out the performance
    plot the performance on each fold
    """
    # print('F1 on TroFi by 10-fold = ', optimal_f1s)
    # print('Precision on TroFi = ', np.mean(np.array(optimal_ps)))
    # print('Recall on TroFi = ', np.mean(np.array(optimal_rs)))
    # print('F1 on TroFi = ', np.mean(np.array(optimal_f1s)))
    # print('Accuracy on TroFi = ', np.mean(np.array(optimal_accs)))
    return optimal_f1s, np.mean(np.array(optimal_ps)), np.mean(
        np.array(optimal_rs)), np.mean(np.array(optimal_f1s)), np.mean(
            np.array(optimal_accs))