コード例 #1
0
ファイル: train.py プロジェクト: 513415184/nlp
def main(train_file,
         dev_file,
         vocab_file,
         target_dir,
         max_length=50,
         hidden_size=300,
         dropout=0.2,
         num_classes=2,
         epochs=1,
         batch_size=256,
         lr=0.0005,
         patience=5,
         max_grad_norm=10.0,
         gpu_index=0,
         checkpoint=None):
    #device = torch.device("cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu")
    device = torch.device("cpu")
    print(20 * "=", " Preparing for training ", 20 * "=")
    # 保存模型的路径
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    # -------------------- Data loading ------------------- #
    print("\t* Loading training data...")
    train_data = LCQMC_Dataset(train_file, vocab_file, max_length)
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    print("\t* Loading validation data...")
    dev_data = LCQMC_Dataset(dev_file, vocab_file, max_length)
    dev_loader = DataLoader(dev_data, shuffle=True, batch_size=batch_size)
    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    # embeddings = load_embeddings(embeddings_file)
    model = ESIM(hidden_size,
                 dropout=dropout,
                 num_labels=num_classes,
                 device=device).to(device)
    # -------------------- Preparation for training  ------------------- #
    print('a')
    criterion = nn.CrossEntropyLoss()
    # 过滤出需要梯度更新的参数
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    print('b')
    # optimizer = optim.Adadelta(parameters, params["LEARNING_RATE"])
    optimizer = torch.optim.Adam(parameters, lr=lr)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.85,
                                                           patience=0)

    best_score = 0.0
    start_epoch = 1
    # Data for loss curves plot
    epochs_count = []
    train_losses = []
    valid_losses = []
    # Continuing training from a checkpoint if one was given as argument
    if checkpoint:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint["epoch"] + 1
        best_score = checkpoint["best_score"]
        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))
        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        epochs_count = checkpoint["epochs_count"]
        train_losses = checkpoint["train_losses"]
        valid_losses = checkpoint["valid_losses"]
    # Compute loss and accuracy before starting (or resuming) training.
    _, valid_loss, valid_accuracy, auc = validate(model, dev_loader, criterion)
    print(
        "\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%, auc: {:.4f}"
        .format(valid_loss, (valid_accuracy * 100), auc))
    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=", "Training ESIM model on device: {}".format(device),
          20 * "=")
    patience_counter = 0
    for epoch in range(start_epoch, epochs + 1):
        epochs_count.append(epoch)
        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader,
                                                       optimizer, criterion,
                                                       epoch, max_grad_norm)
        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy, epoch_auc = validate(
            model, dev_loader, criterion)
        valid_losses.append(epoch_loss)
        print(
            "-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%, auc: {:.4f}\n"
            .format(epoch_time, epoch_loss, (epoch_accuracy * 100), epoch_auc))
        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)
        # Early stopping on validation accuracy.

        if epoch_accuracy < best_score:
            patience_counter += 1
        else:
            best_score = epoch_accuracy
            patience_counter = 0
            torch.save(
                {
                    "epoch": epoch,
                    "model": model.state_dict(),
                    "best_score": best_score,
                    "epochs_count": epochs_count,
                    "train_losses": train_losses,
                    "valid_losses": valid_losses
                }, os.path.join(target_dir, "best.pth.tar"))
            # Save the model at each epoch.
        torch.save(
            {
                "epoch": epoch,
                "model": model.state_dict(),
                "best_score": best_score,
                "optimizer": optimizer.state_dict(),
                "epochs_count": epochs_count,
                "train_losses": train_losses,
                "valid_losses": valid_losses
            }, os.path.join(target_dir, "esim_{}.pth.tar".format(epoch)))

        if patience_counter >= patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break
コード例 #2
0
        y = y.to(device)
        y_hat = net(sent1.to(device), sent2.to(device), mask1.to(device),
                    mask2.to(device))
        l = crition(y_hat, y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        train_l_sum += l
        batch_num += 1
        n += y.shape[0]
        train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
        if batch_num % 500 == 0:
            val_acc, val_loss = val_test(net, dev_iter, crition)
            if val_acc >= best_acc:
                best_acc = val_acc
                torch.save(net.state_dict(), args.model_path)
            print(
                'batch: %d, train_loss: %.3f train_acc: %.4f val_loss: %.3f val_acc: %.4f time: %.3F'
                % (batch_num, train_l_sum / batch_num, train_acc_sum / n,
                   val_loss, val_acc, time.time() - start))

# # 加载测试数据
# def load_test_data(args):
#     text = data.Field(sequential= True, use_vocab = True, lower = True, fix_length = args.max_len, include_lengths = True,batch_first = True)
#     fields = [('sentence1',text),('sentence2',text)]
#     # 读取数据
#     test = []
#     with open('./data/snli.test','r') as f:
#         for line in f.readlines():
#             sents = line.strip().split('|||')
#             test.append(data.Example.fromlist([sents[0],sents[1]],fields))
コード例 #3
0
def main():
    device = args.device
    print(20 * "=", " Preparing for training ", 20 * "=")
    # 保存模型的路径
    if not os.path.exists(args.target_dir):
        os.makedirs(args.target_dir)
    # -------------------- Data loading ------------------- #
    print("Loading data......")
    train_loader, dev_loader, test_loader, SEN1, SEN2 = load_data(
        args.batch_size, device)
    embedding = SEN1.vectors
    vocab_size = len(embedding)
    print("vocab_size:", vocab_size)
    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    model = ESIM(args.hidden_size,
                 embedding=embedding,
                 dropout=args.dropout,
                 num_labels=args.num_classes,
                 device=device).to(device)
    # -------------------- Preparation for training  ------------------- #
    criterion = nn.CrossEntropyLoss()
    # 过滤出需要梯度更新的参数
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=args.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="min",
                                                           factor=0.1,
                                                           patience=10)

    best_score = 0.0
    if args.ckp:
        checkpoint = torch.load(os.path.join(args.target_dir, args.ckp))
        best_score = checkpoint["best_score"]
        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        _, valid_loss, valid_accuracy = validate(model, dev_loader, criterion)
        print("\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%".
              format(valid_loss, (valid_accuracy * 100)))

    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=", "Training ESIM model on device: {}".format(device),
          20 * "=")
    patience_counter = 0
    for epoch in range(args.num_epoch):
        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader,
                                                       optimizer, criterion,
                                                       args.max_grad_norm,
                                                       device)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = validate(
            model, dev_loader, criterion, device)
        print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)

        # Early stopping on validation accuracy.
        if epoch_accuracy < best_score:
            patience_counter += 1
        else:
            print("save model!!!!")
            best_score = epoch_accuracy
            patience_counter = 0
            torch.save(
                {
                    "model": model.state_dict(),
                    "best_score": best_score,
                    "optimizer": optimizer.state_dict(),
                }, os.path.join(args.target_dir, "best.pth.tar"))

        if patience_counter >= 5:
            print("-> Early stopping: patience limit reached, stopping...")
            break

    # ##-------------------- Testing epochs ------------------- #
    # print(20 * "=", " Testing ", 20 * "=")
    # if args.ckp:
    #     checkpoint = torch.load(os.path.join(args.target_dir, args.ckp))
    #     best_score = checkpoint["best_score"]
    #     model.load_state_dict(checkpoint["model"])
    #     optimizer.load_state_dict(checkpoint["optimizer"])
    #
    # print("best_score:", best_score)
    # all_labels = test(model, test_loader, device)
    # print(all_labels[:10])
    # target_label = [id2label[id] for id in all_labels]
    # print(target_label[:10])
    # with open(os.path.join(args.target_dir, 'result.txt'), 'w+') as f:
    #     for label in target_label:
    #         f.write(label + '\n')

    del train_loader
    del dev_loader
    del test_loader
    del SEN1
    del SEN2
    del embedding
コード例 #4
0
                (epoch + 1, epoch_mins, epoch_secs))
    f_log.write('\tTrain Loss: %.3f | Train Acc: %.2f %%\n' %
                (train_loss, train_acc * 100))
    f_log.write('\t Val. Loss: %.3f |  Val. Acc: %.2f %%\n' %
                (valid_loss, valid_acc * 100))
    print('Epoch: %d | Epoch Time: %dm %ds' %
          (epoch + 1, epoch_mins, epoch_secs))
    print('\tTrain Loss: %.3f | Train Acc: %.2f %%' %
          (train_loss, train_acc * 100))
    print('\t Val. Loss: %.3f |  Val. Acc: %.2f %%' %
          (valid_loss, valid_acc * 100))

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        best_valid_acc = valid_acc
        torch.save(model.state_dict(), './saved_model/esim.pt')
        print("New model saved!")
        f_log.write("New model saved!\n")

    f_log.flush()
    f_log.close()

model.load_state_dict(torch.load('./saved_model/esim.pt'))
model.eval()

f_valid = open("data/test-set.data", "r", encoding='utf-8')
f_res = open('prediction.txt', 'w')
for i, rowlist in enumerate(f_valid):
    rowlist = rowlist[:-1].split('\t')
    input_sent = []
    for sent in rowlist[:2]:
コード例 #5
0
def model_train_validate_test(train_df,
                              dev_df,
                              test_df,
                              embeddings_file,
                              vocab_file,
                              target_dir,
                              mode,
                              num_labels=2,
                              max_length=50,
                              hidden_size=200,
                              dropout=0.2,
                              epochs=50,
                              batch_size=256,
                              lr=0.0005,
                              patience=5,
                              max_grad_norm=10.0,
                              gpu_index=0,
                              if_save_model=False,
                              checkpoint=None):
    device = torch.device(
        "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu")
    print(20 * "=", " Preparing for training ", 20 * "=")
    # 保存模型的路径
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    # -------------------- Data loading ------------------- #
    print("\t* Loading training data...")
    train_data = My_Dataset(train_df, vocab_file, max_length, mode)
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    print("\t* Loading validation data...")
    dev_data = My_Dataset(dev_df, vocab_file, max_length, mode)
    dev_loader = DataLoader(dev_data, shuffle=True, batch_size=batch_size)
    print("\t* Loading test data...")
    test_data = My_Dataset(test_df, vocab_file, max_length, mode)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    if (embeddings_file is not None):
        embeddings = load_embeddings(embeddings_file)
    else:
        embeddings = None
    model = ESIM(hidden_size,
                 embeddings=embeddings,
                 dropout=dropout,
                 num_labels=num_labels,
                 device=device).to(device)
    total_params = sum(p.numel() for p in model.parameters())
    print(f'{total_params:,} total parameters.')
    total_trainable_params = sum(p.numel() for p in model.parameters()
                                 if p.requires_grad)
    print(f'{total_trainable_params:,} training parameters.')
    # -------------------- Preparation for training  ------------------- #
    criterion = nn.CrossEntropyLoss()
    # 过滤出需要梯度更新的参数
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    # optimizer = optim.Adadelta(parameters, params["LEARNING_RATE"])
    optimizer = torch.optim.Adam(parameters, lr=lr)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.85,
                                                           patience=0)
    best_score = 0.0
    start_epoch = 1
    # Data for loss curves plot
    epochs_count = []
    train_losses = []
    valid_losses = []
    # Continuing training from a checkpoint if one was given as argument
    if checkpoint:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint["epoch"] + 1
        best_score = checkpoint["best_score"]
        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))
        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        epochs_count = checkpoint["epochs_count"]
        train_losses = checkpoint["train_losses"]
        valid_losses = checkpoint["valid_losses"]
    # Compute loss and accuracy before starting (or resuming) training.
    _, valid_loss, valid_accuracy, _, = validate(model, dev_loader, criterion)
    print("\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%".
          format(valid_loss, (valid_accuracy * 100)))
    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=", "Training ESIM model on device: {}".format(device),
          20 * "=")
    patience_counter = 0
    for epoch in range(start_epoch, epochs + 1):
        epochs_count.append(epoch)
        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader,
                                                       optimizer, criterion,
                                                       epoch, max_grad_norm)
        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy, _, = validate(
            model, dev_loader, criterion)
        valid_losses.append(epoch_loss)
        print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)
        # Early stopping on validation accuracy.
        if epoch_accuracy < best_score:
            patience_counter += 1
        else:
            best_score = epoch_accuracy
            patience_counter = 0

            if (if_save_model):
                torch.save(
                    {
                        "epoch": epoch,
                        "model": model.state_dict(),
                        "best_score": best_score,
                        "epochs_count": epochs_count,
                        "train_losses": train_losses,
                        "valid_losses": valid_losses
                    }, os.path.join(target_dir, "best.pth.tar"))

                print("save model succesfully!\n")

            print("* Test for epoch {}:".format(epoch))
            _, _, test_accuracy, predictions = validate(
                model, test_loader, criterion)
            print("Test accuracy: {:.4f}%\n".format(test_accuracy))
            test_prediction = pd.DataFrame({'prediction': predictions})
            test_prediction.to_csv(os.path.join(target_dir,
                                                "test_prediction.csv"),
                                   index=False)

        if patience_counter >= patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break
コード例 #6
0
ファイル: train.py プロジェクト: xhjcxxl/qianyan
def main(args):
    print(20 * "=", " Preparing for training ", 20 * "=")
    # 保存模型的路径
    if not os.path.exists(args.target_dir):
        os.makedirs(args.target_dir)

    # -------------------- Data loading ------------------- #
    print("\t* Loading training data...")
    # train_data = LCQMC_dataset(args.train_file, args.vocab_file, args.max_length, test_flag=False)
    train_data = LCQMC_dataset(args.train_file, args.vocab_file, args.max_length, test_flag=False)
    train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True)
    print("\t* Loading valid data...")
    dev_data = LCQMC_dataset(args.dev_file, args.vocab_file, args.max_length, test_flag=False)
    dev_loader = DataLoader(dev_data, batch_size=args.batch_size, shuffle=True)
    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    embeddings = load_embeddings(args.embed_file)
    model = ESIM(args, embeddings=embeddings).to(args.device)

    # -------------------- Preparation for training  ------------------- #
    criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数
    # 过滤出需要梯度更新的参数
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr=args.lr)  # 优化器
    # 学习计划
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max',
                                                           factor=0.85, patience=0)

    best_score = 0.0
    start_epoch = 1

    epochs_count = []
    train_losses = []
    valid_losses = []
    # Continuing training from a checkpoint if one was given as argument
    if args.checkpoint:
        # 从文件中加载checkpoint数据, 从而继续训练模型
        checkpoints = torch.load(args.checkpoint)
        start_epoch = checkpoints["epoch"] + 1
        best_score = checkpoints["best_score"]
        print("\t* Training will continue on existing model from epoch {}...".format(start_epoch))
        model.load_state_dict(checkpoints["model"])  # 模型部分
        optimizer.load_state_dict(checkpoints["optimizer"])
        epochs_count = checkpoints["epochs_count"]
        train_losses = checkpoints["train_losses"]
        valid_losses = checkpoints["valid_losses"]

        # 这里改为只有从以前加载的checkpoint中才进行计算 valid, Compute loss and accuracy before starting (or resuming) training.
        _, valid_loss, valid_accuracy, auc = validate(model, dev_loader, criterion)
        print("\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%, auc: {:.4f}"
              .format(valid_loss, (valid_accuracy * 100), auc))
    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=", "Training ESIM model on device: {}".format(args.device), 20 * "=")
    patience_counter = 0

    for epoch in range(start_epoch, args.epochs + 1):
        epochs_count.append(epoch)
        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader, optimizer,
                                                       criterion, epoch, args.max_grad_norm)
        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%"
              .format(epoch_time, epoch_loss, (epoch_accuracy * 100)))

        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy, epoch_auc = validate(model, train_loader, criterion)
        valid_losses.append(epoch_loss)
        print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%, auc: {:.4f}\n"
              .format(epoch_time, epoch_loss, (epoch_accuracy * 100), epoch_auc))
        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)
        # Early stopping on validation accuracy.
        if epoch_accuracy < best_score:
            patience_counter += 1
        else:
            best_score = epoch_accuracy
            patience_counter = 0
            # 保存最好的结果,需要保存的参数,这些参数在checkpoint中都能找到
            torch.save(
                {
                    "epoch": epoch,
                    "model": model.state_dict(),
                    "best_score": best_score,
                    "epochs_count": epochs_count,
                    "train_losses": train_losses,
                    "valid_losses": valid_losses},
                os.path.join(args.target_dir, "new_best.pth.tar"))
        # 保存每个epoch的结果 Save the model at each epoch.(这里可要可不要)
        torch.save(
            {
                "epoch": epoch,
                "model": model.state_dict(),
                "best_score": best_score,
                "optimizer": optimizer.state_dict(),
                "epochs_count": epochs_count,
                "train_losses": train_losses,
                "valid_losses": valid_losses},
            os.path.join(args.target_dir, "new_esim_{}.pth.tar".format(epoch)))

        if patience_counter >= args.patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break
コード例 #7
0
def main(test_file, pretrained_file, embeddings_file, batch_size=1):
    """
    Test the ESIM model with pretrained weights on some dataset.

    Args:
        test_file: The path to a file containing preprocessed NLI data.
        pretrained_file: The path to a checkpoint produced by the
            'train_model' script.
        vocab_size: The number of words in the vocabulary of the model
            being tested.
        embedding_dim: The size of the embeddings in the model.
        hidden_size: The size of the hidden layers in the model. Must match
            the size used during training. Defaults to 300.
        num_classes: The number of classes in the output of the model. Must
            match the value used during training. Defaults to 3.
        batch_size: The size of the batches used for testing. Defaults to 32.
    """
    debug_file = open('test_debug.txt', 'w')
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    #device = torch.device("cpu")

    print(20 * "=", " Preparing for generating representations ", 20 * "=")

    checkpoint = torch.load(pretrained_file)

    # Retrieving model parameters from checkpoint.
    vocab_size = checkpoint["model"]["_word_embedding.weight"].size(0)
    embedding_dim = checkpoint["model"]['_word_embedding.weight'].size(1)
    hidden_size = checkpoint["model"]["_projection.0.weight"].size(0)
    num_classes = checkpoint["model"]["_classification.4.weight"].size(0)

    print("\t* Loading the data...")
    with open(test_file, "rb") as pkl:
        test_data = NLIDataset(pickle.load(pkl))
    print(test_data, file=debug_file)

    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

    print("\t* Building model...")

    # loading the embedding weights separately
    # with open(embeddings_file, "rb") as pkl:
    pkl = open(embeddings_file, "rb")
    embeddings = torch.tensor(pickle.load(pkl), dtype=torch.float)\
                 .to(device)
    pkl.close()

    # model = ESIM(vocab_size,
    #              embedding_dim,
    #              hidden_size,
    #              num_classes=num_classes,
    #              device=device).to(device)
    model = ESIM(embeddings.shape[0],
                 embeddings.shape[1],
                 hidden_size,
                 embeddings=embeddings,
                 num_classes=num_classes,
                 device=device).to(device)
    # Writing custom load_state_dict
    pretrained_dict = checkpoint["model"]
    own_state = model.state_dict()
    for i, (name, param) in enumerate(pretrained_dict.items()):
        #print(name, type(name))
        # if name is "_word_embedding.weight":
        #     print(name)
        #     continue
        if i == 0:
            continue
        if isinstance(param, Parameter):
            # backwards compatibility for serialized parameters
            param = param.data
        own_state[name].copy_(param)

    #model.load_state_dict(checkpoint["model"])

    print(
        20 * "=",
        " Loading the representations from ESIM model on device: {} ".format(
            device), 20 * "=")
    batch_time, total_time, save_rep = test(model, test_loader)
    print("-> Average batch processing time: {:.4f}s, total test time:\
 {:.4f}s,%".format(batch_time, total_time))
    file_debug = open('test_save_rep_details.txt', 'w')
    print('len of save_rep is' + str(len(save_rep)), file=file_debug)
    try:
        print('save_rep sample key is' + str(list(save_rep.keys())[0]),
              file=file_debug)
        print('save_rep sample value is' + str(list(save_rep.values())[0]),
              file=file_debug)
    except:
        pass

    # Dump save_rep as a pickle file
    with open('test_nv_repr.pickle', 'wb') as handle:
        pickle.dump(save_rep, handle, protocol=pickle.HIGHEST_PROTOCOL)
コード例 #8
0
def main(train_q1_file,
         train_q2_file,
         train_labels_file,
         dev_q1_file,
         dev_q2_file,
         dev_labels_file,
         embeddings_file,
         target_dir,
         hidden_size=128,
         dropout=0.5,
         num_classes=2,
         epochs=15,
         batch_size=64,
         lr=0.001,
         patience=5,
         max_grad_norm=10.0,
         gpu_index=0,
         checkpoint=None):

    device = torch.device(
        "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu")

    print(20 * "=", " Preparing for training ", 20 * "=")

    # 保存模型的路径
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # -------------------- Data loading ------------------- #
    print("\t* Loading training data...")
    train_q1 = np.load(train_q1_file)
    train_q2 = np.load(train_q2_file)
    train_labels = np.load(train_labels_file)
    #    train_labels = label_transformer(train_labels)

    train_data = {"q1": train_q1, "q2": train_q2, "labels": train_labels}

    train_data = QQPDataset(train_data)
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

    print("\t* Loading validation data...")
    dev_q1 = np.load(dev_q1_file)
    dev_q2 = np.load(dev_q2_file)
    dev_labels = np.load(dev_labels_file)
    #    dev_labels = label_transformer(dev_labels)

    dev_data = {"q1": dev_q1, "q2": dev_q2, "labels": dev_labels}

    dev_data = QQPDataset(dev_data)
    dev_loader = DataLoader(dev_data, shuffle=True, batch_size=batch_size)

    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    embeddings = torch.tensor(np.load(embeddings_file),
                              dtype=torch.float).to(device)

    model = ESIM(embeddings.shape[0],
                 embeddings.shape[1],
                 hidden_size,
                 embeddings=embeddings,
                 dropout=dropout,
                 num_classes=num_classes,
                 device=device).to(device)

    # -------------------- Preparation for training  ------------------- #
    criterion = nn.CrossEntropyLoss()
    # 过滤出需要梯度更新的参数
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    # optimizer = optim.Adadelta(parameters, params["LEARNING_RATE"])
    optimizer = torch.optim.Adam(parameters, lr=lr)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.85,
                                                           patience=0)

    best_score = 0.0
    start_epoch = 1

    # Data for loss curves plot
    epochs_count = []
    train_losses = []
    valid_losses = []

    # Continuing training from a checkpoint if one was given as argument
    if checkpoint:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint["epoch"] + 1
        best_score = checkpoint["best_score"]

        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))

        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        epochs_count = checkpoint["epochs_count"]
        train_losses = checkpoint["train_losses"]
        valid_losses = checkpoint["valid_losses"]

    # Compute loss and accuracy before starting (or resuming) training.
    _, valid_loss, valid_accuracy = validate(model, dev_loader, criterion)
    print("\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%".
          format(valid_loss, (valid_accuracy * 100)))

    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=", "Training ESIM model on device: {}".format(device),
          20 * "=")

    patience_counter = 0
    for epoch in range(start_epoch, epochs + 1):
        epochs_count.append(epoch)

        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader,
                                                       optimizer, criterion,
                                                       epoch, max_grad_norm)

        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))

        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = validate(
            model, dev_loader, criterion)

        valid_losses.append(epoch_loss)
        print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))

        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)

        # Early stopping on validation accuracy.
        if epoch_accuracy < best_score:
            patience_counter += 1
        else:
            best_score = epoch_accuracy
            patience_counter = 0
            # Save the best model. The optimizer is not saved to avoid having
            # a checkpoint file that is too heavy to be shared. To resume
            # training from the best model, use the 'esim_*.pth.tar'
            # checkpoints instead.
            torch.save(
                {
                    "epoch": epoch,
                    "model": model.state_dict(),
                    "best_score": best_score,
                    "epochs_count": epochs_count,
                    "train_losses": train_losses,
                    "valid_losses": valid_losses
                }, os.path.join(target_dir, "best.pth.tar"))

        # Save the model at each epoch.
        torch.save(
            {
                "epoch": epoch,
                "model": model.state_dict(),
                "best_score": best_score,
                "optimizer": optimizer.state_dict(),
                "epochs_count": epochs_count,
                "train_losses": train_losses,
                "valid_losses": valid_losses
            }, os.path.join(target_dir, "esim_{}.pth.tar".format(epoch)))

        if patience_counter >= patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break