Example #1
0
 def __get_params(self):
     """
     __get_params
     """
     config_dir = _get_abs_path("config")
     param_path = os.path.join(config_dir, 'infer.json')
     param_dict = from_file(param_path)
     self._params = replace_none(param_dict)
Example #2
0
 def init_data_params(self):
     """
     :return:
     """
     model_path = self.param["inference_model_path"]
     data_params_path = model_path + "/infer_data_params.json"
     param_dict = params.from_file(data_params_path)
     param_dict = params.replace_none(param_dict)
     self.input_keys = param_dict.get("fields")
Example #3
0
    def train(self, json_path):
        """
        the function use to retrain model
        :param model_save_dir: where to saving model after training
        """
        param_dict = params.from_file(json_path)
        _params = params.replace_none(param_dict)
        register.import_modules()

        dataset_reader_params_dict = _params.get("dataset_reader")
        dataset_reader = dataset_reader_from_params(dataset_reader_params_dict)

        model_params_dict = _params.get("model")
        model, num_train_examples = model_from_params(model_params_dict)

        trainer_params_dict = _params.get("trainer")
        trainer = build_trainer(trainer_params_dict, dataset_reader, model, num_train_examples)

        trainer.train_and_eval()
        logging.info("end of run train and eval .....")
Example #4
0
    def init_model(self,
                   model_class="ernie_1.0_skep_large_ch",
                   task="sentiment_classify",
                   use_cuda=False):
        """
        init_model
        """
        ptm = self._params.get("model_name").get(model_class)
        ptm_id = ptm.get('type')
        task_id = self._params.get("task_name").get(task)
        model_dict = self._params.get("model_class").get(ptm_id + task_id)

        # step 1: get_init_model, if download
        data_url = model_dict.get("model_file_http_url")
        md5_url = model_dict.get("model_md5_http_url")
        is_download_data = download_data(data_url, md5_url)

        # step 2 get model_class
        register.import_modules()
        model_name = model_dict.get("type")
        self.model_class = RegisterSet.models.__getitem__(model_name)(
            model_dict)

        # step 3 init data params
        model_path = _get_abs_path(model_dict.get("inference_model_path"))
        data_params_path = model_path + "/infer_data_params.json"
        param_dict = from_file(data_params_path)
        param_dict = replace_none(param_dict)
        self.input_keys = param_dict.get("fields")

        # step 4 init env
        self.inference = self.__load_inference_model(model_path, use_cuda)

        # step 5: tokenizer
        tokenizer_info = model_dict.get("predict_reader").get('tokenizer')
        tokenizer_name = tokenizer_info.get('type')
        tokenizer_vocab_path = _get_abs_path(tokenizer_info.get('vocab_path'))
        tokenizer_params = None
        if tokenizer_info.__contains__("params"):
            tokenizer_params = tokenizer_info.get("params")
            bpe_v_file = tokenizer_params["bpe_vocab_file"]
            bpe_j_file = tokenizer_params["bpe_json_file"]
            tokenizer_params["bpe_vocab_file"] = _get_abs_path(bpe_v_file)
            tokenizer_params["bpe_json_file"] = _get_abs_path(bpe_j_file)

        tokenizer_class = RegisterSet.tokenizer.__getitem__(tokenizer_name)
        self.tokenizer = tokenizer_class(vocab_file=tokenizer_vocab_path,
                                         split_char=" ",
                                         unk_token="[UNK]",
                                         params=tokenizer_params)
        self.max_seq_len = 512
        self.truncation_type = 0
        self.padding_id = 1 if tokenizer_name == "GptBpeTokenizer" else 0

        self.inference_type = model_dict.get("inference_type", None)

        # step6: label_map
        label_map_file = model_dict.get("label_map_path", None)
        self.label_map = {}
        if isinstance(label_map_file, str):
            label_map_file = _get_abs_path(label_map_file)
            with open(label_map_file, 'r') as fr:
                for line in fr.readlines():
                    line = line.strip('\r\n')
                    items = line.split('\t')
                    idx, label = int(items[1]), items[0]
                    self.label_map[idx] = label
Example #5
0

def build_inference(params_dict, dataset_reader, model):
    """build trainer"""
    inference = Inference(param=params_dict,
                          data_set_reader=dataset_reader,
                          model_class=model)
    return inference


if __name__ == "__main__":
    args = args.build_common_arguments()
    log.init_log("./log/infer", level=logging.DEBUG)
    # 分类任务的预测
    param_dict = params.from_file(args.param_path)
    _params = params.replace_none(param_dict)

    register.import_modules()

    dataset_reader_params_dict = _params.get("dataset_reader")
    dataset_reader = dataset_reader_from_params(dataset_reader_params_dict)

    model_params_dict = _params.get("model")
    model = model_from_params(model_params_dict)

    inference_params_dict = _params.get("inference")
    inference = build_inference(inference_params_dict, dataset_reader, model)

    inference.do_inference()

    logging.info("os exit.")