예제 #1
0
    def __init__(self):
        super(BertPolicy, self).__init__()
        # load config
        infer_config = self.load_config()

        # download data
        model_dir = os.path.join(infer_config["data_path"], "trained_model")
        # model_dir = os.path.join('/xhp/src/xbot/output/policy/bert', 'Epoch-19-f1-0.903')
        infer_config["model_dir"] = model_dir
        self.download_data(infer_config, model_dir)
        # 应该保持和训练使用的一致,否则 label 顺序不一致,TODO 训练时对 act_ontology 排序
        self.act_ontology = load_json(infer_config["act_ontology"])
        self.num_act = len(self.act_ontology)

        model_config = BertConfig.from_pretrained(infer_config["model_dir"])
        model_config.num_labels = self.num_act
        self.model = BertForSequenceClassification.from_pretrained(
            infer_config["model_dir"], config=model_config)
        self.tokenizer = BertTokenizer.from_pretrained(
            infer_config["model_dir"])

        self.model.eval()
        self.model.to(infer_config["device"])

        self.db = Database()
        self.config = infer_config
        self.threshold = infer_config["threshold"]
예제 #2
0
    def load_config() -> dict:
        """Load config for inference.

        Returns:
            config dict
        """
        common_config_path = os.path.join(get_config_path(),
                                          BertPolicy.common_config_name)
        infer_config_path = os.path.join(get_config_path(),
                                         BertPolicy.inference_config_name)
        common_config = load_json(common_config_path)
        infer_config = load_json(infer_config_path)
        infer_config.update(common_config)
        infer_config["device"] = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        infer_config["data_path"] = os.path.join(get_data_path(),
                                                 "crosswoz/policy_bert_data")
        if not os.path.exists(infer_config["data_path"]):
            os.makedirs(infer_config["data_path"])
        return infer_config
예제 #3
0
def load_act_ontology() -> Tuple[List[str], int]:
    """Load action ontology from cache.

    Returns:
        action ontology and numbers of action
    """
    act_ontology = load_json(
        os.path.join(get_data_path(),
                     "crosswoz/policy_bert_data/act_ontology.json"))
    num_act = len(act_ontology)
    return act_ontology, num_act
예제 #4
0
파일: train.py 프로젝트: zy12105228/xbot
    def load_data(self, data_type: str) -> DataLoader:
        """Load data from data cache or build from scratch.

        Args:
            data_type: train, dev or tests

        Returns:
            DataLoader, see torch.utils.data.DataLoader
        """
        raw_data_path = os.path.join(
            self.config["raw_data_path"], f"{data_type}.json.zip"
        )
        filename = f"{data_type}.json"
        output_path = os.path.join(self.config["data_path"], filename)
        if not os.path.exists(output_path) or not self.config["use_data_cache"]:
            examples = preprocess(raw_data_path, output_path, filename)
        else:
            print(f"Loading {data_type} data from cache ...")
            examples = load_json(output_path)

        if self.config["debug"]:
            examples = random.sample(examples, k=int(len(examples) * 0.1))
        examples_dict = self.get_input_ids(examples)

        print(f"Starting building {data_type} dataset ...")
        dataset = PolicyDataset(**examples_dict)
        shuffle = True if data_type == "train" else False
        collate = partial(collate_fn, mode=data_type)
        batch_size = self.config[f"{data_type}_batch_size"]
        dataloader = DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            num_workers=self.config["num_workers"],
            shuffle=shuffle,
            pin_memory=True,
            collate_fn=collate,
        )
        return dataloader