예제 #1
0
def load_model(filename):
    rnn_clf = RNNSequenceClassifier(num_classes=2,
                                    embedding_dim=300 + 1024 + 50,
                                    hidden_size=300,
                                    num_layers=1,
                                    bidir=True,
                                    dropout1=0.3,
                                    dropout2=0.2,
                                    dropout3=0.2)
    rnn_clf.load_state_dict(torch.load(filename))
    rnn_clf.cuda()
    return rnn_clf
예제 #2
0
def train_model():
    rnn_clf = RNNSequenceClassifier(num_classes=2,
                                    embedding_dim=300 + 1024 + 50,
                                    hidden_size=300,
                                    num_layers=1,
                                    bidir=True,
                                    dropout1=0.3,
                                    dropout2=0.2,
                                    dropout3=0.2)
    # Move the model to the GPU if available
    if using_GPU:
        rnn_clf = rnn_clf.cuda()
    # Set up criterion for calculating loss
    nll_criterion = nn.NLLLoss()
    # Set up an optimizer for updating the parameters of the rnn_clf
    rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.01, momentum=0.9)
    # Number of epochs (passes through the dataset) to train the model for.
    num_epochs = 20
    '''
    3. 2
    train model
    '''
    training_loss = []
    val_loss = []
    training_f1 = []
    val_f1 = []
    # A counter for the number of gradient updates
    num_iter = 0
    for epoch in tqdm(range(num_epochs)):
        # print("Starting epoch {}".format(epoch + 1))
        for (example_text, example_lengths, labels) in train_dataloader_vua:
            example_text = Variable(example_text)
            example_lengths = Variable(example_lengths)
            labels = Variable(labels)
            if using_GPU:
                example_text = example_text.cuda()
                example_lengths = example_lengths.cuda()
                labels = labels.cuda()
            # predicted shape: (batch_size, 2)
            predicted = rnn_clf(example_text, example_lengths)
            batch_loss = nll_criterion(predicted, labels)
            rnn_clf_optimizer.zero_grad()
            batch_loss.backward()
            rnn_clf_optimizer.step()
            num_iter += 1
            # Calculate validation and training set loss and accuracy every 200 gradient updates
            if num_iter % 200 == 0:
                avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(
                    val_dataloader_vua, rnn_clf, nll_criterion, using_GPU)
                val_loss.append(avg_eval_loss)
                val_f1.append(f1)
                print(
                    "Iteration {}. Validation Loss {}. Accuracy {}. Precision {}. Recall {}. F1 {}. class-wise F1 {}."
                    .format(num_iter, avg_eval_loss, eval_accuracy, precision,
                            recall, f1, fus_f1))
                filename = f'../models/classification/VUA_iter_{str(num_iter)}.pt'
                torch.save(rnn_clf.state_dict(), filename)
    # print("Training done!")
    return rnn_clf, nll_criterion
예제 #3
0
    3. Model training
    """
    '''
    3. 1 
    set up model, loss criterion, optimizer
    '''
    # Instantiate the model
    # embedding_dim = glove + elmo + suffix indicator
    # dropout1: dropout on input to RNN
    # dropout2: dropout in RNN; would be used if num_layers!=1
    # dropout3: dropout on hidden state of RNN to linear layer
    rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300+1024+50, hidden_size=300, num_layers=1, bidir=True,
                     dropout1=0.2, dropout2=0, dropout3=0.2)
    # Move the model to the GPU if available
    if using_GPU:
        rnn_clf = rnn_clf.cuda()
    # Set up criterion for calculating loss
    nll_criterion = nn.NLLLoss()
    # Set up an optimizer for updating the parameters of the rnn_clf
    rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.02, momentum=0.9)
    # Number of epochs (passes through the dataset) to train the model for.
    num_epochs = 30

    '''
    3. 2
    train model
    '''
    training_loss = []
    val_loss = []
    training_f1 = []
    val_f1 = []
예제 #4
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # Path options.
    parser.add_argument("--pretrained_w2v_model_path",
                        required=True,
                        type=str,
                        help="Path of the tence w2v pretrained model.")
    parser.add_argument("--query_matrix_path",
                        required=True,
                        type=str,
                        help="Path of the query matrix.")
    parser.add_argument("--summary_result_path",
                        required=True,
                        type=str,
                        help="Path of the output model.")
    parser.add_argument("--output_result_path",
                        required=True,
                        type=str,
                        help="Path of the output result.")
    parser.add_argument("--train_path",
                        type=str,
                        required=True,
                        help="Path of the trainset.")
    parser.add_argument("--dev_path",
                        type=str,
                        required=True,
                        help="Path of the devset.")
    parser.add_argument("--test_path",
                        type=str,
                        required=True,
                        help="Path of the testset.")
    parser.add_argument("--vocab_path",
                        type=str,
                        required=True,
                        help="Path of the vocab.")
    parser.add_argument("--elmo_path",
                        type=str,
                        required=True,
                        help="Path of the elmo features.")

    # Model options.
    parser.add_argument("--language_type",
                        type=str,
                        choices=["en", "zh"],
                        required=True,
                        help="Num of the classes.")
    parser.add_argument("--num_classes",
                        type=int,
                        default=3,
                        help="Num of the classes.")
    parser.add_argument("--batch_size",
                        type=int,
                        default=64,
                        help="Batch size.")
    parser.add_argument("--require_improvement",
                        type=int,
                        default=5,
                        help="Require improvement.")
    parser.add_argument("--epochs_num",
                        type=int,
                        default=100,
                        help="Number of epochs.")
    parser.add_argument("--w2v_embedding_dim",
                        type=int,
                        required=True,
                        help="w2v embedding dim.")
    parser.add_argument("--elmo_embedding_dim",
                        type=int,
                        default=1024,
                        help="elmo embedding dim.")
    parser.add_argument("--input_dim",
                        type=int,
                        required=True,
                        help="input embedding dim.")
    parser.add_argument("--seq_length",
                        type=int,
                        default=128,
                        help="Sequence length.")
    parser.add_argument("--hidden_size",
                        type=int,
                        default=200,
                        help="hidden size.")
    parser.add_argument("--layers_num",
                        type=int,
                        default=2,
                        help="Number of layers.")
    parser.add_argument("--attention_query_size",
                        type=int,
                        default=200,
                        help="Size of attention query matrix.")
    parser.add_argument("--attention_layer",
                        choices=[
                            "att", "m_a", "m_pre_orl_a", "m_pre_orl_pun_a",
                            "m_pol_untrain_a", "mpa", "mpoa"
                        ],
                        required=True,
                        help="attention type.")
    parser.add_argument("--pretrain_model_type",
                        choices=["w2v", "elmo", "none"],
                        required=True,
                        help="pretrain model type.")

    # Optimizer options.
    parser.add_argument("--learning_rate",
                        type=float,
                        default=0.1,
                        help="Learning rate.")
    parser.add_argument("--momentum",
                        type=float,
                        default=0.9,
                        help="momentum.")
    # Training options.
    parser.add_argument("--dropout", type=float, default=0.2, help="Dropout.")
    parser.add_argument("--is_bidir",
                        type=int,
                        default=2,
                        help="bidir or only one.")
    parser.add_argument("--report_steps",
                        type=int,
                        default=100,
                        help="Specific steps to print prompt.")
    parser.add_argument("--seed", type=int, default=7, help="Random seed.")
    parser.add_argument("--run_type",
                        type=str,
                        required=True,
                        help="usage: python main_vua.py [train / test]")

    args = parser.parse_args()

    #set numpy、random、etc seeds
    set_seed(args.seed)

    #set vocab
    vocab = Vocab()
    vocab.load(args.vocab_path)
    label_columns = read_cataloge(args.dev_path)
    #set embedding
    embeddings = get_embedding_matrix(args, vocab, normalization=False)
    elmo_embedding = h5py.File(args.elmo_path, 'r')
    query_matrix = get_query_matrix(args)

    # For simplicity, we use DataParallel wrapper to use multiple GPUs.
    model = RNNSequenceClassifier(args, embeddings, query_matrix)
    model = model.cuda()

    best_josn = {
        'F_macro': 0,
        'P_macro': 0,
        'R_macro': 0,
        'Best_F_macro': 0,
        'ACC': 0,
        'F_negative': 0,
        'F_positive': 0,
        'Predict': [],
        'Label': [],
        'Weights': [],
        'Last_up_epoch': 0,
        'Total_batch_loss': 0,
        'F_nuetral': 0,
        'Time': 0,
        'Total_orthogonal_loss': 0,
        'train_num': 0,
        'test_num': 0,
        'dev_num': 0
    }

    def evaluate(args, is_test):
        model.eval()
        if is_test:
            print("Start testing.")
            dataset = read_dataset(args, args.test_path, label_columns, vocab)
            best_josn['test_num'] = len(dataset)
            writer_result = open(os.path.join(args.output_result_path,
                                              'result.txt'),
                                 encoding='utf-8',
                                 mode='w')
            writer_summary_result = open(os.path.join(args.summary_result_path,
                                                      'summary_result.txt'),
                                         mode='a')
        else:
            dataset = read_dataset(args, args.dev_path, label_columns, vocab)
            best_josn['dev_num'] = len(dataset)
            random.shuffle(dataset)
        input_ids = torch.LongTensor([example[0] for example in dataset])
        label_ids = torch.LongTensor([example[1] for example in dataset])
        length_ids = torch.LongTensor([example[2] for example in dataset])
        input = [example[3] for example in dataset]

        if is_test:
            batch_size = 1
        else:
            batch_size = args.batch_size

        for i, (input_ids_batch, label_ids_batch,
                length_ids_batch) in enumerate(
                    batch_loader(batch_size, input_ids, label_ids,
                                 length_ids)):
            model.zero_grad()
            input_ids_batch = input_ids_batch.cuda()
            label_ids_batch = label_ids_batch.cuda()
            length_ids_batch = length_ids_batch.cuda()

            if args.attention_layer == 'att':
                predicted, weight = model(input_ids_batch, length_ids_batch,
                                          elmo_embedding)
            else:
                predicted, weight, _ = model(input_ids_batch, length_ids_batch,
                                             elmo_embedding)
            best_josn['Weights'] += weight.squeeze(
                dim=1).cpu().detach().numpy().tolist()
            _, predicted_labels = torch.max(predicted.data, 1)
            best_josn['Predict'] += predicted_labels.cpu().numpy().tolist()
            best_josn['Label'] += label_ids_batch.data.cpu().numpy().tolist()

        if is_test:
            details_result = metrics.classification_report(
                best_josn['Label'], best_josn['Predict'])
            best_josn['P_macro'], best_josn['R_macro'], best_josn[
                'F_macro'], _ = metrics.precision_recall_fscore_support(
                    best_josn['Label'], best_josn['Predict'], average="macro")
            best_josn['ACC'] = metrics.classification.accuracy_score(
                best_josn['Label'], best_josn['Predict'])
            saveSenResult(input, best_josn['Label'], best_josn['Predict'],
                          args, best_josn['Weights'])
            writer_result.writelines(details_result)
            print(
                "Testing Acc: {:.4f}, F_macro: {:.4f}, P_macro: {:.4f}, R_macro: {:.4f}"
                .format(best_josn['ACC'], best_josn['F_macro'],
                        best_josn['P_macro'], best_josn['R_macro']))
            writer_result.writelines(
                "Testing Acc: {:.4f}, F_macro: {:.4f}, P_macro: {:.4f}, R_macro: {:.4f}"
                .format(best_josn['ACC'], best_josn['F_macro'],
                        best_josn['P_macro'], best_josn['R_macro']))
            writer_summary_result.writelines('保存路径' + args.output_result_path +
                                             '\n')
            writer_summary_result.writelines(
                "Testing Acc: {:.4f}, F_macro: {:.4f}, P_macro: {:.4f}, R_macro: {:.4f}\n\n"
                .format(best_josn['ACC'], best_josn['F_macro'],
                        best_josn['P_macro'], best_josn['R_macro']))
            writer_summary_result.writelines(details_result)
        else:
            best_josn['P_macro'], best_josn['R_macro'], best_josn[
                'F_macro'], _ = metrics.precision_recall_fscore_support(
                    best_josn['Label'], best_josn['Predict'], average="macro")
            best_josn['ACC'] = metrics.classification.accuracy_score(
                best_josn['Label'], best_josn['Predict'])

    def train():
        print("Start training.")
        mkdir(args.output_result_path)
        writer_process = open(os.path.join(args.output_result_path,
                                           'process.txt'),
                              mode='w')
        writer_process.writelines("Start training.")
        trainset = read_dataset(args, args.train_path, label_columns, vocab)
        random.shuffle(trainset)

        best_josn['train_num'] = len(trainset)
        input_ids = torch.LongTensor([example[0] for example in trainset])
        label_ids = torch.LongTensor([example[1] for example in trainset])
        length_ids = torch.LongTensor([example[2] for example in trainset])

        print("Batch size: ", args.batch_size)
        print("The number of training instances:", best_josn['train_num'])

        start_time = time.time()
        best_josn['Time'] = get_time_dif(start_time)
        print("Time usage:", best_josn['Time'])

        param_optimizer = list(model.named_parameters())
        nll_criterion = nn.NLLLoss()
        if args.attention_layer == 'm_pol_untrain_a':
            optimizer_grouped_parameters = [{
                'params': [
                    p for n, p in param_optimizer
                    if ('query_embedding.weight' not in n)
                ],
                'weight_decay_rate':
                0.01
            }]
        else:
            optimizer_grouped_parameters = [{
                'params': [p for n, p in param_optimizer],
                'weight_decay_rate':
                0.01
            }]
        optimizer = optim.SGD(optimizer_grouped_parameters,
                              lr=args.learning_rate,
                              momentum=args.momentum)
        for epoch in range(1, args.epochs_num + 1):
            model.train()
            for i, (input_ids_batch, label_ids_batch,
                    length_ids_batch) in enumerate(
                        batch_loader(args.batch_size, input_ids, label_ids,
                                     length_ids)):
                model.zero_grad()
                input_ids_batch = input_ids_batch.cuda()
                label_ids_batch = label_ids_batch.cuda()
                length_ids_batch = length_ids_batch.cuda()

                if args.attention_layer == 'att':
                    predicted_ids_batch, _ = model(input_ids_batch,
                                                   length_ids_batch,
                                                   elmo_embedding)
                else:
                    predicted_ids_batch, _, orthogonal_loss = model(
                        input_ids_batch, length_ids_batch, elmo_embedding)
                    best_josn['Total_orthogonal_loss'] += orthogonal_loss
                batch_loss = nll_criterion(predicted_ids_batch,
                                           label_ids_batch)
                best_josn['Total_batch_loss'] += batch_loss
                if args.attention_layer != 'm_pre_orl_pun_a' and args.attention_layer != 'mpoa':
                    optimizer.zero_grad()
                    batch_loss.backward()
                    optimizer.step()
                else:
                    optimizer.zero_grad()
                    (0.1 * orthogonal_loss).backward(retain_graph=True)
                    (0.9 * batch_loss).backward()
                    optimizer.step()
                best_josn['Time'] = get_time_dif(start_time)
                if (i + 1) % args.report_steps == 0:
                    if args.attention_layer == 'att':
                        print(
                            "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Time: {}"
                            .format(
                                epoch, i + 1, best_josn['Total_batch_loss'] /
                                args.report_steps, best_josn['Time']))
                        writer_process.writelines(
                            "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Time: {}"
                            .format(
                                epoch, i + 1, best_josn['Total_batch_loss'] /
                                args.report_steps, best_josn['Time']))
                    else:
                        print(
                            "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Avg orthogonal loss: {:.4f}, Time: {}"
                            .format(
                                epoch, i + 1, best_josn['Total_batch_loss'] /
                                args.report_steps,
                                best_josn['Total_orthogonal_loss'] /
                                args.report_steps, best_josn['Time']))
                        writer_process.writelines(
                            "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Avg orthogonal loss: {:.4f}, Time: {}"
                            .format(
                                epoch, i + 1, best_josn['Total_batch_loss'] /
                                args.report_steps,
                                best_josn['Total_orthogonal_loss'] /
                                args.report_steps, best_josn['Time']))
                    best_josn['Total_batch_loss'] = 0
                    best_josn['Total_orthogonal_loss'] = 0
            # 读取验证集
            evaluate(args, False)
            best_josn['Time'] = get_time_dif(start_time)
            if best_josn['F_macro'] > best_josn['Best_F_macro'] + 0.001:
                best_josn['Best_F_macro'] = best_josn['F_macro']
                best_josn['Last_up_epoch'] = epoch
                torch.save(model,
                           os.path.join(args.output_result_path, 'result.pkl'))
                print("Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} *".format(
                    best_josn['ACC'], best_josn['F_macro'], best_josn['Time']))
                writer_process.writelines(
                    "Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} *".format(
                        best_josn['ACC'], best_josn['F_macro'],
                        best_josn['Time']))
            elif epoch - best_josn['Last_up_epoch'] == args.require_improvement:
                print("No optimization for a long time, auto-stopping...")
                writer_process.writelines(
                    "No optimization for a long time, auto-stopping...")
                break
            else:
                print("Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} ".format(
                    best_josn['ACC'], best_josn['F_macro'], best_josn['Time']))
                writer_process.writelines(
                    "Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} ".format(
                        best_josn['ACC'], best_josn['F_macro'],
                        best_josn['Time']))

    if args.run_type == 'train':
        train()
    else:
        model = torch.load(os.path.join(args.output_result_path, 'result.pkl'))
        evaluate(args, True)
예제 #5
0
def train_model():
    optimal_f1s = []
    optimal_ps = []
    optimal_rs = []
    optimal_accs = []
    # predictions_all = []

    for i in tqdm(range(10)):
        '''
        2. 3
        set up Dataloader for batching
        '''
        training_sentences = []
        training_labels = []
        for j in range(10):
            if j != i:
                training_sentences.extend(ten_folds[j][0])
                training_labels.extend(ten_folds[j][1])
        training_dataset_trofi = TextDataset(training_sentences, training_labels)
        val_dataset_trofi = TextDataset(ten_folds[i][0], ten_folds[i][1])

        # Data-related hyperparameters
        batch_size = 10
        # Set up a DataLoader for the training, validation, and test dataset
        train_dataloader_trofi = DataLoader(dataset=training_dataset_trofi, batch_size=batch_size, shuffle=True,
                                          collate_fn=TextDataset.collate_fn)
        val_dataloader_trofi = DataLoader(dataset=val_dataset_trofi, batch_size=batch_size, shuffle=False,
                                          collate_fn=TextDataset.collate_fn)
        """
        3. Model training
        """
        '''
        3. 1 
        set up model, loss criterion, optimizer
        '''

        # Instantiate the model
        # embedding_dim = glove + elmo + suffix indicator
        # dropout1: dropout on input to RNN
        # dropout2: dropout in RNN; would be used if num_layers=1
        # dropout3: dropout on hidden state of RNN to linear layer
        rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300+1024+50, hidden_size=300,
                                        num_layers=1, bidir=True,
                                        dropout1=0.2, dropout2=0, dropout3=0)
        # Move the model to the GPU if available
        if using_GPU:
            rnn_clf = rnn_clf.cuda()
        # Set up criterion for calculating loss
        nll_criterion = nn.NLLLoss()
        # Set up an optimizer for updating the parameters of the rnn_clf
        rnn_clf_optimizer = optim.Adam(rnn_clf.parameters(), lr=0.001)
        # Number of epochs (passes through the dataset) to train the model for.
        num_epochs = 15

        '''
        3. 2
        train model
        '''
        training_loss = []
        val_loss = []
        training_f1 = []
        val_f1 = []
        val_p = []
        val_r = []
        val_acc = []
        # A counter for the number of gradient updates
        num_iter = 0
        train_dataloader = train_dataloader_trofi
        val_dataloader = val_dataloader_trofi
        model_index = 0
        for epoch in range(num_epochs):
            # print("Starting epoch {}".format(epoch + 1))
            for (example_text, example_lengths, labels) in train_dataloader:
                example_text = Variable(example_text)
                example_lengths = Variable(example_lengths)
                labels = Variable(labels)
                if using_GPU:
                    example_text = example_text.cuda()
                    example_lengths = example_lengths.cuda()
                    labels = labels.cuda()
                # predicted shape: (batch_size, 2)
                predicted = rnn_clf(example_text, example_lengths)
                batch_loss = nll_criterion(predicted, labels)
                rnn_clf_optimizer.zero_grad()
                batch_loss.backward()
                rnn_clf_optimizer.step()
                num_iter += 1
                # Calculate validation and training set loss and accuracy every 200 gradient updates
                if num_iter % 200 == 0:
                    avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(val_dataloader, rnn_clf, nll_criterion, using_GPU)
                    val_loss.append(avg_eval_loss)
                    val_f1.append(f1)
                    val_p.append(precision)
                    val_r.append(recall)
                    val_acc.append(eval_accuracy.item())
                    # print(
                    #     "Iteration {}. Validation Loss {}. Validation Accuracy {}. Validation Precision {}. Validation Recall {}. Validation F1 {}. Validation class-wise F1 {}.".format(
                    #         num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1))
                    # filename = f'../models/classification/TroFi_fold_{str(i)}_iter_{str(num_iter)}.pt'
                    # torch.save(rnn_clf, filename)
                    model_index += 1
    #                 avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(train_dataloader, rnn_clf, nll_criterion, using_GPU)
    #                 training_loss.append(avg_eval_loss)
    #                 training_f1.append(f1)
    #                 print(
    #                     "Iteration {}. Training Loss {}. Training Accuracy {}. Training Precision {}. Training Recall {}. Training F1 {}. Training class-wise F1 {}.".format(
    #                         num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1))

        """
        additional trianing!
        """
    #     rnn_clf_optimizer = optim.Adam(rnn_clf.parameters(), lr=0.0005)
    #     for epoch in range(num_epochs):
    #         print("Starting epoch {}".format(epoch + 1))
    #         for (example_text, example_lengths, labels) in train_dataloader:
    #             example_text = Variable(example_text)
    #             example_lengths = Variable(example_lengths)
    #             labels = Variable(labels)
    #             if using_GPU:
    #                 example_text = example_text.cuda()
    #                 example_lengths = example_lengths.cuda()
    #                 labels = labels.cuda()
    #             # predicted shape: (batch_size, 2)
    #             predicted = rnn_clf(example_text, example_lengths)
    #             batch_loss = nll_criterion(predicted, labels)
    #             rnn_clf_optimizer.zero_grad()
    #             batch_loss.backward()
    #             rnn_clf_optimizer.step()
    #             num_iter += 1
    #             # Calculate validation and training set loss and accuracy every 200 gradient updates
    #             if num_iter % 100 == 0:
    #                 avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(val_dataloader, rnn_clf, nll_criterion, using_GPU)
    #                 val_loss.append(avg_eval_loss)
    #                 val_f1.append(f1)
    #                 val_p.append(precision)
    #                 val_r.append(recall)
    #                 val_acc.append(eval_accuracy)
    #                 print(
    #                     "Iteration {}. Validation Loss {}. Validation Accuracy {}. Validation Precision {}. Validation Recall {}. Validation F1 {}. Validation class-wise F1 {}.".format(
    #                         num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1))
    #                 model_index += 1

        # print("Training done for fold {}".format(i))

        """
        3.3
        plot the training process: MET F1 and losses for validation and training dataset
        """
    #     plt.figure(0)
    #     plt.title('F1 for TroFI dataset on fold ' + str(i))
    #     plt.xlabel('iteration (unit:200)')
    #     plt.ylabel('F1')
    #     plt.plot(val_f1,'g')
    #     plt.plot(val_p,'r')
    #     plt.plot(val_r,'b')
    #     plt.plot(val_acc,'c')
    #     plt.plot(training_f1, 'b')
    #     plt.legend(['Validation F1', 'Validation precision', 'validaiton recall', 'validation accuracy', 'Training F1'], loc='upper right')
    #     plt.show()


    #     plt.figure(1)
    #     plt.title('Loss for TroFi dataset on fold ' + str(i))
    #     plt.xlabel('iteration (unit:200)')
    #     plt.ylabel('Loss')
    #     plt.plot(val_loss,'g')
    #     plt.plot(training_loss, 'b')
    #     plt.legend(['Validation loss', 'Training loss'], loc='upper right')
    #     plt.show()

        """
        store the best f1
        """
        # print('val_f1: ', val_f1)
        idx = 0
        if math.isnan(max(val_f1)):
            optimal_f1s.append(max(val_f1[6:]))
            idx = val_f1.index(optimal_f1s[-1])
            optimal_ps.append(val_p[idx])
            optimal_rs.append(val_r[idx])
            optimal_accs.append(val_acc[idx])
        else:
            optimal_f1s.append(max(val_f1))
            idx = val_f1.index(optimal_f1s[-1])
            optimal_ps.append(val_p[idx])
            optimal_rs.append(val_r[idx])
            optimal_accs.append(val_acc[idx])
    #     filename = '../models/LSTMSuffixElmoAtt_TroFi_fold_' + str(i) + '_epoch_' + str(idx) + '.pt'
    #     temp_model = torch.load(filename)
    #     print('best model: ', filename)
    #     predictions_all.extend(test(val_dataloader_TroFi, temp_model, using_GPU))
    return np.mean(np.array(optimal_ps)), np.mean(np.array(optimal_rs)), np.mean(np.array(optimal_f1s)), np.mean(np.array(optimal_accs))
예제 #6
0
def train_model():
    optimal_f1s = []
    optimal_ps = []
    optimal_rs = []
    optimal_accs = []
    for i in tqdm(range(10)):
        '''
        2. 3
        set up Dataloader for batching
        '''
        training_sentences = []
        training_labels = []
        for j in range(10):
            if j != i:
                training_sentences.extend(ten_folds[j][0])
                training_labels.extend(ten_folds[j][1])
        training_dataset_mohX = TextDataset(training_sentences, training_labels)
        val_dataset_mohX = TextDataset(ten_folds[i][0], ten_folds[i][1])

        # Data-related hyperparameters
        batch_size = 10
        # Set up a DataLoader for the training, validation, and test dataset
        train_dataloader_mohX = DataLoader(dataset=training_dataset_mohX, batch_size=batch_size, shuffle=True,
                                          collate_fn=TextDataset.collate_fn)
        val_dataloader_mohX = DataLoader(dataset=val_dataset_mohX, batch_size=batch_size, shuffle=True,
                                          collate_fn=TextDataset.collate_fn)
        """
        3. Model training
        """
        '''
        3. 1 
        set up model, loss criterion, optimizer
        '''
        # Instantiate the model
        # embedding_dim = glove + elmo + suffix indicator
        # dropout1: dropout on input to RNN
        # dropout2: dropout in RNN; would be used if num_layers!=1
        # dropout3: dropout on hidden state of RNN to linear layer
        rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300+1024+50, hidden_size=300, num_layers=1, bidir=True,
                         dropout1=0.2, dropout2=0, dropout3=0.2)
        # Move the model to the GPU if available
        if using_GPU:
            rnn_clf = rnn_clf.cuda()
        # Set up criterion for calculating loss
        nll_criterion = nn.NLLLoss()
        # Set up an optimizer for updating the parameters of the rnn_clf
        rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.02, momentum=0.9)
        # Number of epochs (passes through the dataset) to train the model for.
        num_epochs = 30

        '''
        3. 2
        train model
        '''
        training_loss = []
        val_loss = []
        training_f1 = []
        val_f1 = []
        val_p = []
        val_r = []
        val_acc = []

        # A counter for the number of gradient updates
        num_iter = 0
        train_dataloader = train_dataloader_mohX
        val_dataloader = val_dataloader_mohX
        for epoch in range(num_epochs):
            # print("Starting epoch {}".format(epoch + 1))
            for (example_text, example_lengths, labels) in train_dataloader:
                example_text = Variable(example_text)
                example_lengths = Variable(example_lengths)
                labels = Variable(labels)
                if using_GPU:
                    example_text = example_text.cuda()
                    example_lengths = example_lengths.cuda()
                    labels = labels.cuda()
                # predicted shape: (batch_size, 2)
                predicted = rnn_clf(example_text, example_lengths)
                batch_loss = nll_criterion(predicted, labels)
                rnn_clf_optimizer.zero_grad()
                batch_loss.backward()
                rnn_clf_optimizer.step()
                num_iter += 1
                # Calculate validation and training set loss and accuracy every 200 gradient updates
                if num_iter % 200 == 0:
                    avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(val_dataloader, rnn_clf, nll_criterion, using_GPU)
                    val_loss.append(avg_eval_loss)
                    val_f1.append(f1)
                    val_p.append(precision)
                    val_r.append(recall)
                    val_acc.append(eval_accuracy.item())
                    # print(
                    #     "Iteration {}. Validation Loss {}. Validation Accuracy {}. Validation Precision {}. Validation Recall {}. Validation F1 {}. Validation class-wise F1 {}.".format(
                    #         num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1))
                    # filename = f'../models/classification/MOHX_fold_{str(i)}_iter_{str(num_iter)}.pt'
                    # torch.save(rnn_clf, filename)
                    avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(train_dataloader, rnn_clf, nll_criterion, using_GPU)
                    training_loss.append(avg_eval_loss)
                    training_f1.append(f1)
                    # print(
                    #     "Iteration {}. Training Loss {}. Training Accuracy {}. Training Precision {}. Training Recall {}. Training F1 {}. Training class-wise F1 {}.".format(
                    #         num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1))
        # print("Training done for fold {}".format(i))

        # store the best f1
        idx = 0
        if math.isnan(max(val_f1)):
            optimal_f1s.append(max(val_f1[6:]))
            idx = val_f1.index(optimal_f1s[-1])
            optimal_ps.append(val_p[idx])
            optimal_rs.append(val_r[idx])
            optimal_accs.append(val_acc[idx])
        else:
            optimal_f1s.append(max(val_f1))
            idx = val_f1.index(optimal_f1s[-1])
            optimal_ps.append(val_p[idx])
            optimal_rs.append(val_r[idx])
            optimal_accs.append(val_acc[idx])
    return np.mean(np.array(optimal_ps)), np.mean(np.array(optimal_rs)), np.mean(np.array(optimal_f1s)), np.mean(np.array(optimal_accs))
    # print('F1 on MOH-X by 10-fold = ', optimal_f1s)
    # print('F1 on MOH-X = ', np.mean(np.array(optimal_f1s)))

    """