Ejemplo n.º 1
0
    def __init__(self):
        # path
        root_path = get_root_path()
        config_file = os.path.join(
            root_path,
            "conf/{}".format(SlotWithBertPredictor.default_model_config),
        )

        # load config
        config = json.load(open(config_file))
        data_path = os.path.join(root_path, config["data_dir"])
        device = config["DEVICE"]

        # load intent, tag vocabulary and dataloader
        intent_vocab = json.load(
            open(os.path.join(data_path, "intent_vocab.json"),
                 encoding="utf-8"))
        tag_vocab = json.load(
            open(os.path.join(data_path, "tag_vocab.json"), encoding="utf-8"))
        dataloader = Dataloader(
            tag_vocab=tag_vocab,
            intent_vocab=intent_vocab,
            pretrained_weights=config["model"]["pretrained_weights"],
        )
        # load best model
        best_model_path = os.path.join(
            DEFAULT_MODEL_PATH, SlotWithBertPredictor.default_model_name)
        if not os.path.exists(best_model_path):
            download_from_url(SlotWithBertPredictor.default_model_url,
                              best_model_path)
        model = SlotWithBert(config["model"], device, dataloader.tag_dim)
        try:
            model.load_state_dict(
                torch.load(
                    os.path.join(DEFAULT_MODEL_PATH,
                                 SlotWithBertPredictor.default_model_name),
                    map_location="cpu",
                ))
        except Exception as e:
            print(e)
        model.to(device)

        self.model = model
        self.dataloader = dataloader
        print(f"{best_model_path} loaded - {best_model_path}")
Ejemplo n.º 2
0
    def __init__(self):
        # path
        root_path = get_root_path()

        config_file = os.path.join(
            get_config_path(), IntentWithBertPredictor.default_model_config)

        # load config
        config = json.load(open(config_file))
        self.device = config["DEVICE"]

        # load intent vocabulary and dataloader
        intent_vocab = json.load(
            open(
                os.path.join(get_data_path(),
                             "crosswoz/nlu_intent_data/intent_vocab.json"),
                encoding="utf-8",
            ))
        dataloader = Dataloader(
            intent_vocab=intent_vocab,
            pretrained_weights=config["model"]["pretrained_weights"],
        )
        # load best model
        best_model_path = os.path.join(
            os.path.join(root_path, DEFAULT_MODEL_PATH),
            IntentWithBertPredictor.default_model_name,
        )
        # best_model_path = os.path.join(DEFAULT_MODEL_PATH, IntentWithBertPredictor.default_model_name)
        if not os.path.exists(best_model_path):
            download_from_url(IntentWithBertPredictor.default_model_url,
                              best_model_path)
        model = IntentWithBert(config["model"], self.device,
                               dataloader.intent_dim)
        model.load_state_dict(
            torch.load(best_model_path, map_location=self.device))

        model.to(self.device)
        model.eval()
        self.model = model
        self.dataloader = dataloader
        print(f"{best_model_path} loaded - {best_model_path}")
Ejemplo n.º 3
0
def main():

    # load config
    root_path = get_root_path()
    config_path = os.path.join(root_path, "conf/nlu_joint.json")
    config = json.load(open(config_path))
    data_path = config["data_dir"]
    data_path = os.path.join(root_path, data_path)
    output_dir = config["output_dir"]
    output_dir = os.path.join(root_path, output_dir)
    log_dir = config["log_dir"]
    output_dir = os.path.join(root_path, output_dir)
    device = config["DEVICE"]

    set_seed(config["seed"])

    # 经过preprocess处理之后,会产生intent_vocab,tag_vocab,train_data,test_data,val_data,数据集
    # 导入intent_vocab,tag_vocab数据集
    intent_vocab = json.load(
        open(os.path.join(data_path, "intent_vocab.json"), encoding="utf-8"))
    tag_vocab = json.load(
        open(os.path.join(data_path, "tag_vocab.json"), encoding="utf-8"))

    dataloader = Dataloader(
        intent_vocab=intent_vocab,
        tag_vocab=tag_vocab,
        pretrained_weights=config["model"]["pretrained_weights"],
    )

    # 导入train_data,val_data,test_data数据集
    for data_key in ["train", "val", "test"]:
        dataloader.load_data(
            json.load(
                open(os.path.join(data_path,
                                  "joint_{}_data.json".format(data_key)),
                     encoding="utf-8")),
            data_key,
            cut_sen_len=config["cut_sen_len"],
            use_bert_tokenizer=config["use_bert_tokenizer"],
        )
        print("{} set size: {}".format(data_key,
                                       len(dataloader.data[data_key])))

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    writer = SummaryWriter(log_dir)
    # 导入模型
    model = JointWithBert(
        config["model"],
        device,
        dataloader.tag_dim,
        dataloader.intent_dim,
        dataloader.intent_weight,
    )
    model.to(device)

    # 判断是否进行finetune
    if config["model"]["finetune"]:
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if not any(nd in n for nd in no_decay) and p.requires_grad
                ],
                "weight_decay":
                config["model"]["weight_decay"],
            },
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if any(nd in n for nd in no_decay) and p.requires_grad
                ],
                "weight_decay":
                0.0,
            },
        ]
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=config["model"]["learning_rate"],
            eps=config["model"]["adam_epsilon"],
        )
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=config["model"]["warmup_steps"],
            num_training_steps=config["model"]["max_step"],
        )
    else:
        for n, p in model.named_parameters():
            if "bert_policy" in n:
                p.requires_grad = False
        optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()),
            lr=config["model"]["learning_rate"],
        )

    for name, param in model.named_parameters():
        print(name, param.shape, param.device, param.requires_grad)

    max_step = config["model"]["max_step"]
    check_step = config["model"]["check_step"]
    batch_size = config["model"]["batch_size"]
    model.zero_grad()
    train_slot_loss, train_intent_loss = 0, 0
    best_val_f1 = 0.0

    writer.add_text("config", json.dumps(config))

    for step in range(1, max_step + 1):
        model.train()
        batched_data = dataloader.get_train_batch(
            batch_size)  # 随机获得batch_size个样本,
        # 因为在调用get_train_batch的时候,需要调用pad_batch,所以得到的输出是7维的
        batched_data = tuple(t.to(device) for t in batched_data)
        (
            word_seq_tensor,
            tag_seq_tensor,
            intent_tensor,
            word_mask_tensor,
            tag_mask_tensor,
            context_seq_tensor,
            context_mask_tensor,
        ) = batched_data
        if not config["model"]["context"]:
            context_seq_tensor, context_mask_tensor = None, None
        _, _, slot_loss, intent_loss = model(
            word_seq_tensor,
            word_mask_tensor,
            tag_seq_tensor,
            tag_mask_tensor,
            intent_tensor,
            context_seq_tensor,
            context_mask_tensor,
        )
        train_slot_loss += slot_loss.item()
        train_intent_loss += intent_loss.item()
        loss = slot_loss + intent_loss  # 将slot_loss和intent_loss直接相加
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       1.0)  # 进行梯度裁剪,防止梯度爆炸
        optimizer.step()
        if config["model"]["finetune"]:
            # Update learning rate schedule
            scheduler.step()
        model.zero_grad()  #
        if step % check_step == 0:
            train_slot_loss = train_slot_loss / check_step
            train_intent_loss = train_intent_loss / check_step
            print("[%d|%d] step" % (step, max_step))
            print("\t slot loss:", train_slot_loss)
            print("\t intent loss:", train_intent_loss)

            predict_golden = {"intent": [], "slot": [], "overall": []}

            val_slot_loss, val_intent_loss = 0, 0
            model.eval()
            for pad_batch, ori_batch, real_batch_size in dataloader.yield_batches(
                    batch_size, data_key="val"):
                pad_batch = tuple(t.to(device) for t in pad_batch)
                (
                    word_seq_tensor,
                    tag_seq_tensor,
                    intent_tensor,
                    word_mask_tensor,
                    tag_mask_tensor,
                    context_seq_tensor,
                    context_mask_tensor,
                ) = pad_batch
                if not config["model"]["context"]:
                    context_seq_tensor, context_mask_tensor = None, None

                with torch.no_grad():
                    slot_logits, intent_logits, slot_loss, intent_loss = model.forward(
                        word_seq_tensor,
                        word_mask_tensor,
                        tag_seq_tensor,
                        tag_mask_tensor,
                        intent_tensor,
                        context_seq_tensor,
                        context_mask_tensor,
                    )
                val_slot_loss += slot_loss.item() * real_batch_size
                val_intent_loss += intent_loss.item() * real_batch_size
                for j in range(real_batch_size):
                    predicts = recover_intent(
                        dataloader,
                        intent_logits[j],
                        slot_logits[j],
                        tag_mask_tensor[j],
                        ori_batch[j][0],
                        ori_batch[j][-4],
                    )
                    labels = ori_batch[j][3]

                    predict_golden["overall"].append({
                        "predict": predicts,
                        "golden": labels
                    })
                    predict_golden["slot"].append({
                        "predict": [x for x in predicts if is_slot_da(x)],
                        "golden": [x for x in labels if is_slot_da(x)],
                    })
                    predict_golden["intent"].append({
                        "predict": [x for x in predicts if not is_slot_da(x)],
                        "golden": [x for x in labels if not is_slot_da(x)],
                    })

            for j in range(10):
                writer.add_text(
                    "val_sample_{}".format(j),
                    json.dumps(predict_golden["overall"][j],
                               indent=2,
                               ensure_ascii=False),
                    global_step=step,
                )

            total = len(dataloader.data["val"])
            val_slot_loss /= total
            val_intent_loss /= total
            print("%d samples val" % total)
            print("\t slot loss:", val_slot_loss)
            print("\t intent loss:", val_intent_loss)

            writer.add_scalar("intent_loss/train",
                              train_intent_loss,
                              global_step=step)
            writer.add_scalar("intent_loss/val",
                              val_intent_loss,
                              global_step=step)

            writer.add_scalar("slot_loss/train",
                              train_slot_loss,
                              global_step=step)
            writer.add_scalar("slot_loss/val", val_slot_loss, global_step=step)

            for x in ["intent", "slot", "overall"]:
                precision, recall, F1 = calculate_f1(predict_golden[x])
                print("-" * 20 + x + "-" * 20)
                print("\t Precision: %.2f" % (100 * precision))
                print("\t Recall: %.2f" % (100 * recall))
                print("\t F1: %.2f" % (100 * F1))

                writer.add_scalar("val_{}/precision".format(x),
                                  precision,
                                  global_step=step)
                writer.add_scalar("val_{}/recall".format(x),
                                  recall,
                                  global_step=step)
                writer.add_scalar("val_{}/F1".format(x), F1, global_step=step)

            if F1 > best_val_f1:
                best_val_f1 = F1
                torch.save(model.state_dict(),
                           os.path.join(output_dir, "pytorch_model_nlu.pt"))
                print("best val F1 %.4f" % best_val_f1)
                print("save on", output_dir)

            train_slot_loss, train_intent_loss = 0, 0

    writer.add_text("val overall F1", "%.2f" % (100 * best_val_f1))
    writer.close()
    model_path = os.path.join(output_dir, "pytorch_model_nlu.pt")
    torch.save(model.state_dict(), model_path)
Ejemplo n.º 4
0
    calculate_f1,
)

import torch
import numpy as np


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)


if __name__ == "__main__":
    # path
    root_path = get_root_path()
    config_file = os.path.join(get_config_path(),
                               IntentWithBertPredictor.default_model_config)
    config = json.load(open(config_file))
    data_dir = os.path.join(get_data_path(), "crosswoz/nlu_intent_data/")
    output_dir = config["output_dir"]
    output_dir = os.path.join(root_path, output_dir)
    log_dir = config["log_dir"]
    log_dir = os.path.join(root_path, log_dir)
    device = config["DEVICE"]

    set_seed(config["seed"])

    intent_vocab = json.load(
        open(os.path.join(data_dir, "intent_vocab.json"), encoding="utf-8"))
    dataloader = Dataloader(
Ejemplo n.º 5
0
def main():
    # data_urls = {
    #     "intent_train_data.json": "http://xbot.bslience.cn/intent_train_data.json",
    #     "intent_val_data.json": "http://xbot.bslience.cn/intent_val_data.json",
    #     "intent_test_data.json": "http://xbot.bslience.cn/intent_test_data.json",
    # }
    # load config
    root_path = get_root_path()
    config_path = os.path.join(get_config_path(), "nlu_intent.json")
    config = json.load(open(config_path))
    data_path = config["data_dir"]
    data_path = os.path.join(root_path, data_path)
    output_dir = config["output_dir"]
    output_dir = os.path.join(root_path, output_dir)
    log_dir = config["log_dir"]
    log_dir = os.path.join(root_path, log_dir)
    device = config["DEVICE"]

    # download data
    # for data_key, url in data_urls.items():
    #     dst = os.path.join(os.path.join(data_path, data_key))
    #     if not os.path.exists(dst):
    #         download_from_url(url, dst)

    # seed
    set_seed(config["seed"])

    # load intent vocabulary and dataloader
    intent_vocab = json.load(
        open(os.path.join(data_path, "intent_vocab.json"), encoding="utf-8"))
    dataloader = Dataloader(
        intent_vocab=intent_vocab,
        pretrained_weights=config["model"]["pretrained_weights"],
    )

    # load data
    for data_key in ["train", "val", "test"]:
        dataloader.load_data(
            json.load(
                open(
                    os.path.join(data_path,
                                 "intent_{}_data.json".format(data_key)),
                    encoding="utf-8",
                )),
            data_key,
            cut_sen_len=config["cut_sen_len"],
            use_bert_tokenizer=config["use_bert_tokenizer"],
        )
        print("{} set size: {}".format(data_key,
                                       len(dataloader.data[data_key])))

    # output and log dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    writer = SummaryWriter(log_dir)

    # model
    model = IntentWithBert(config["model"], device, dataloader.intent_dim,
                           dataloader.intent_weight)
    model.to(device)

    # optimizer and scheduler
    if config["model"]["finetune"]:
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if not any(nd in n for nd in no_decay) and p.requires_grad
                ],
                "weight_decay":
                config["model"]["weight_decay"],
            },
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if any(nd in n for nd in no_decay) and p.requires_grad
                ],
                "weight_decay":
                0.0,
            },
        ]
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=config["model"]["learning_rate"],
            eps=config["model"]["adam_epsilon"],
        )
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=config["model"]["warmup_steps"],
            num_training_steps=config["model"]["max_step"],
        )
    else:
        for n, p in model.named_parameters():
            if "bert_policy" in n:
                p.requires_grad = False
        optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()),
            lr=config["model"]["learning_rate"],
        )

    max_step = config["model"]["max_step"]
    check_step = config["model"]["check_step"]
    batch_size = config["model"]["batch_size"]
    model.zero_grad()
    train_intent_loss = 0
    best_val_f1 = 0.0

    writer.add_text("config", json.dumps(config))

    for step in range(1, max_step + 1):
        model.train()
        batched_data = dataloader.get_train_batch(batch_size)
        batched_data = tuple(t.to(device) for t in batched_data)
        word_seq_tensor, word_mask_tensor, intent_tensor = batched_data
        intent_logits, intent_loss = model.forward(word_seq_tensor,
                                                   word_mask_tensor,
                                                   intent_tensor)

        train_intent_loss += intent_loss.item()
        loss = intent_loss
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        if config["model"]["finetune"]:
            scheduler.step()  # Update learning rate schedule

        model.zero_grad()
        if step % check_step == 0:
            train_intent_loss = train_intent_loss / check_step
            print("[%d|%d] step" % (step, max_step))
            print("\t intent loss:", train_intent_loss)

            predict_golden = {"intent": []}

            val_intent_loss = 0
            model.eval()
            for pad_batch, ori_batch, real_batch_size in dataloader.yield_batches(
                    batch_size, data_key="val"):
                pad_batch = tuple(t.to(device) for t in pad_batch)
                word_seq_tensor, word_mask_tensor, intent_tensor = pad_batch

                with torch.no_grad():
                    intent_logits, intent_loss = model.forward(
                        word_seq_tensor, word_mask_tensor, intent_tensor)

                val_intent_loss += intent_loss.item() * real_batch_size
                for j in range(real_batch_size):
                    predicts = recover_intent(dataloader, intent_logits[j])
                    labels = ori_batch[j][1]

                    predict_golden["intent"].append({
                        "predict": [x for x in predicts],
                        "golden": [x for x in labels],
                    })

            total = len(dataloader.data["val"])
            val_intent_loss /= total
            print("%d samples val" % total)
            print("\t intent loss:", val_intent_loss)

            writer.add_scalar("intent_loss/train",
                              train_intent_loss,
                              global_step=step)
            writer.add_scalar("intent_loss/val",
                              val_intent_loss,
                              global_step=step)

            for x in ["intent"]:
                precision, recall, F1 = calculate_f1(predict_golden[x])
                print("-" * 20 + x + "-" * 20)
                print("\t Precision: %.2f" % (100 * precision))
                print("\t Recall: %.2f" % (100 * recall))
                print("\t F1: %.2f" % (100 * F1))

                writer.add_scalar("val_{}/precision".format(x),
                                  precision,
                                  global_step=step)
                writer.add_scalar("val_{}/recall".format(x),
                                  recall,
                                  global_step=step)
                writer.add_scalar("val_{}/F1".format(x), F1, global_step=step)

            if F1 > best_val_f1:
                best_val_f1 = F1
                torch.save(
                    model.state_dict(),
                    os.path.join(output_dir,
                                 "pytorch-intent-with-bert_policy.pt"),
                )
                print("best val F1 %.4f" % best_val_f1)
                print("save on", output_dir)

            train_intent_loss = 0

    writer.add_text("val intent F1", "%.2f" % (100 * best_val_f1))
    writer.close()

    model_path = os.path.join(output_dir,
                              "pytorch-intent-with-bert_policy.pt")  ##存放模型
    zip_path = config["zipped_model_path"]
    zip_path = os.path.join(root_path, zip_path)
    print("zip model to", zip_path)

    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:  ##存放压缩模型
        zf.write(model_path)