Beispiel #1
0
def my_train():
    os.makedirs(f"model_result", exist_ok=True)
    torch.manual_seed(1)
    device = torch.device('cuda')

    data_dir = f"data/{DATASET}/processed"

    # 加载
    train_data = NERDataset(os.path.join(data_dir, "train.pkl"))
    test_data = NERDataset(os.path.join(data_dir, "test.pkl"))
    dev_data = NERDataset(os.path.join(data_dir, "dev.pkl"))

    word_to_idx = load_obj(os.path.join(data_dir, "word_to_idx.pkl"))
    tag_to_idx = load_obj(os.path.join(data_dir, "tag_to_idx.pkl"))

    idx_to_tag = {n: m for m, n in tag_to_idx.items()}

    train_loader = DataLoader(
        train_data,
        batch_size=BATCH_SIZE,
        collate_fn=BatchPadding(),
        shuffle=True,
        num_workers=2,
        pin_memory=True,
    )
    dev_loader = DataLoader(
        dev_data,
        batch_size=BATCH_SIZE,
        collate_fn=BatchPadding(),
        shuffle=True,
        num_workers=2,
        pin_memory=True,
    )
    test_loader = DataLoader(
        test_data,
        batch_size=BATCH_SIZE,
        collate_fn=BatchPadding(),
        shuffle=True,
        num_workers=2,
        pin_memory=True,
    )

    # 建模
    model = BiLSTM_CRF(len(word_to_idx), len(tag_to_idx), EMBEDDING_DIM,
                       HIDDEN_DIM, DROPOUT).to(device)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=LEARN_RATE)

    print("\n开始训练")
    f1_max = 0
    cur_patience = 0  # 用于避免过拟合
    for epoch in range(EPOCHS):
        model.train()
        for i, (seqs, tags, masks) in enumerate(train_loader, 1):
            optimizer.zero_grad()
            loss = model.loss(seqs.to(device), tags.to(device),
                              masks.to(device))
            loss.backward()
            optimizer.step()
            if i % LOG_INTERVAL == 0:
                print("epoch {}: {:.0f}%\t\tLoss: {:.6f}".format(
                    epoch, 100.0 * i / len(train_loader), loss.item()))
        dev_precision, dev_recall, dev_f1 = evaluate(model, dev_loader,
                                                     idx_to_tag)
        test_precision, test_recall, test_f1 = evaluate(
            model, test_loader, idx_to_tag)
        print(
            f"\ndev\tprecision: {dev_precision}, recall: {dev_recall}, f1: {dev_f1}"
        )
        print(
            f"test\tprecision: {test_precision}, recall: {test_recall}, f1: {test_f1}\n"
        )

        torch.save(model.state_dict(), f"model_result/{epoch}.pt")

        if dev_f1 > f1_max:  # 用于检测过拟合情况
            f1_max = dev_f1
            cur_patience = 0
            if dev_f1 > 0.9 and test_f1 > 0.9:
                break
        else:
            cur_patience += 1
            if cur_patience >= PATIENCE:  # 多次低于最高f1,break
                break
    print("Best dev F1: ", f1_max)
Beispiel #2
0
    # Model
    model = BiLSTM_CRF(
        len(word_to_ix), len(tag_to_ix), args.embed_dim, args.hidden_dim, args.dropout
    ).to(device)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    print("Training...")
    best_dev_f1 = 0
    bad_count = 0
    for epoch in range(args.epochs):
        model.train()
        for i, (seqs, tags, masks) in enumerate(train_loader, 1):
            optimizer.zero_grad()
            loss = model.loss(seqs.to(device), tags.to(device), masks.to(device))
            loss.backward()
            optimizer.step()
            if i % args.log_interval == 0:
                print(
                    "Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                        epoch + 1,
                        i * seqs.size(1),
                        len(train_loader.dataset),
                        100.0 * i / len(train_loader),
                        loss.item(),
                    )
                )
        print("Evaluating...")
        dev_precision, dev_recall, dev_f1 = evaluate(model, dev_loader, ix_to_tag)
        test_precision, test_recall, test_f1 = evaluate(model, test_loader, ix_to_tag)