Ejemplo n.º 1
0
def infer(topology, data_dir, model_path, word_dict_path, label_dict_path,
          batch_size):
    def _infer_a_batch(inferer, test_batch, ids_2_word, ids_2_label):
        probs = inferer.infer(input=test_batch, field=["value"])
        assert len(probs) == len(test_batch)
        for word_ids, prob in zip(test_batch, probs):
            word_text = " ".join([ids_2_word[id] for id in word_ids[0]])
            print("%s\t%s\t%s" % (ids_2_label[prob.argmax()], " ".join(
                ["{:0.4f}".format(p) for p in prob]), word_text))

    logger.info("begin to predict...")
    use_default_data = (data_dir is None)

    if use_default_data:
        word_dict = paddle.dataset.imdb.word_dict()
        word_reverse_dict = dict(
            (value, key) for key, value in word_dict.iteritems())
        label_reverse_dict = {0: "positive", 1: "negative"}
        test_reader = paddle.dataset.imdb.test(word_dict)()
    else:
        assert os.path.exists(
            word_dict_path), "the word dictionary file does not exist"
        assert os.path.exists(
            label_dict_path), "the label dictionary file does not exist"

        word_dict = load_dict(word_dict_path)
        word_reverse_dict = load_reverse_dict(word_dict_path)
        label_reverse_dict = load_reverse_dict(label_dict_path)

        test_reader = reader.test_reader(data_dir, word_dict)()

    dict_dim = len(word_dict)
    class_num = len(label_reverse_dict)
    prob_layer = topology(dict_dim, class_num, is_infer=True)

    # initialize PaddlePaddle
    paddle.init(use_gpu=False, trainer_count=1)

    # load the trained models
    parameters = paddle.parameters.Parameters.from_tar(
        gzip.open(model_path, "r"))
    inferer = paddle.inference.Inference(output_layer=prob_layer,
                                         parameters=parameters)

    test_batch = []
    for idx, item in enumerate(test_reader):
        test_batch.append([item[0]])
        if len(test_batch) == batch_size:
            _infer_a_batch(inferer, test_batch, word_reverse_dict,
                           label_reverse_dict)
            test_batch = []

    if len(test_batch):
        _infer_a_batch(inferer, test_batch, word_reverse_dict,
                       label_reverse_dict)
        test_batch = []
Ejemplo n.º 2
0
def infer(model_path, batch_size, test_data_file, vocab_file, target_file,
          use_gpu):
    """
    use the model under model_path to predict the test data, the result will be printed on the screen

    return nothing
    """
    word_dict = load_dict(vocab_file)
    word_reverse_dict = load_reverse_dict(vocab_file)

    label_dict = load_dict(target_file)
    label_reverse_dict = load_reverse_dict(target_file)

    test_data = paddle.batch(reader.data_reader(test_data_file, word_dict,
                                                label_dict),
                             batch_size=batch_size)
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    inference_scope = fluid.Scope()
    with fluid.scope_guard(inference_scope):
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(model_path, exe)
        for data in test_data():
            word = to_lodtensor([x[0] for x in data], place)
            mark = to_lodtensor([x[1] for x in data], place)
            crf_decode = exe.run(inference_program,
                                 feed={
                                     "word": word,
                                     "mark": mark
                                 },
                                 fetch_list=fetch_targets,
                                 return_numpy=False)
            lod_info = (crf_decode[0].lod())[0]
            np_data = np.array(crf_decode[0])
            assert len(data) == len(lod_info) - 1
            for sen_index in six.moves.xrange(len(data)):
                assert len(
                    data[sen_index][0]) == lod_info[sen_index +
                                                    1] - lod_info[sen_index]
                word_index = 0
                for tag_index in six.moves.xrange(lod_info[sen_index],
                                                  lod_info[sen_index + 1]):
                    word = word_reverse_dict[data[sen_index][0][word_index]]
                    gold_tag = label_reverse_dict[data[sen_index][2]
                                                  [word_index]]
                    tag = label_reverse_dict[np_data[tag_index][0]]
                    print(word + "\t" + gold_tag + "\t" + tag)
                    word_index += 1
                print("")
Ejemplo n.º 3
0
    def __init__(self, inferer, word_dict_file, beam_size=1, max_gen_len=100):
        """
        constructor method.

        :param inferer: object of paddle.Inference that represents the entire
            network to forward compute the test batch
        :type inferer: paddle.Inference
        :param word_dict_file: path of word dictionary file
        :type word_dict_file: str
        :param beam_size: expansion width in each iteration
        :type param beam_size: int
        :param max_gen_len: the maximum number of iterations
        :type max_gen_len: int
        """
        self.inferer = inferer
        self.beam_size = beam_size
        self.max_gen_len = max_gen_len
        self.ids_2_word = load_reverse_dict(word_dict_file)
        logger.info("dictionay len = %d" % (len(self.ids_2_word)))

        try:
            self.eos_id = next(x[0] for x in self.ids_2_word.iteritems()
                               if x[1] == "<e>")
            self.unk_id = next(x[0] for x in self.ids_2_word.iteritems()
                               if x[1] == "<unk>")
        except StopIteration:
            logger.fatal(("the word dictionay must contain an ending mark "
                          "in the text generation task."))

        self.candidate_paths = []
        self.final_paths = []
Ejemplo n.º 4
0
def infer(model_path, image_shape, label_dict_path, infer_file_list_path):

    infer_file_list = get_file_list(infer_file_list_path)
    # 获取标签字典
    char_dict = load_dict(label_dict_path)
    # 获取反转的标签字典
    reversed_char_dict = load_reverse_dict(label_dict_path)
    # 获取字典大小
    dict_size = len(char_dict)
    # 获取reader
    data_generator = DataGenerator(char_dict=char_dict,
                                   image_shape=image_shape)
    # 初始化PaddlePaddle
    paddle.init(use_gpu=True, trainer_count=2)
    # 加载训练好的参数
    parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path))
    # 获取网络模型
    model = Model(dict_size, image_shape, is_infer=True)
    # 获取预测器
    inferer = paddle.inference.Inference(output_layer=model.log_probs,
                                         parameters=parameters)
    # 开始预测
    test_batch = []
    labels = []
    for i, (image, label) in enumerate(
            data_generator.infer_reader(infer_file_list)()):
        test_batch.append([image])
        labels.append(label)
    infer_batch(inferer, test_batch, labels, reversed_char_dict)
Ejemplo n.º 5
0
def infer(model_path, batch_size, test_data_file, vocab_file, target_file):
    def _infer_a_batch(inferer, test_data, id_2_word, id_2_label):
        probs = inferer.infer(input=test_data, field=["id"])
        assert len(probs) == sum(len(x[0]) for x in test_data)

        for idx, test_sample in enumerate(test_data):
            start_id = 0
            for w, tag in zip(test_sample[0],
                              probs[start_id:start_id + len(test_sample[0])]):
                print("%s\t%s" % (id_2_word[w], id_2_label[tag]))
            print("\n")
            start_id += len(test_sample[0])

    word_dict = load_dict(vocab_file)
    word_dict_len = len(word_dict)
    word_reverse_dict = load_reverse_dict(vocab_file)

    label_dict = load_dict(target_file)
    label_reverse_dict = load_reverse_dict(target_file)
    label_dict_len = len(label_dict)

    # initialize PaddlePaddle
    paddle.init(use_gpu=False, trainer_count=1)
    parameters = paddle.parameters.Parameters.from_tar(
        gzip.open(model_path, "r"))

    predict = ner_net(word_dict_len=word_dict_len,
                      label_dict_len=label_dict_len,
                      is_train=False)

    inferer = paddle.inference.Inference(output_layer=predict,
                                         parameters=parameters)

    test_data = []
    for i, item in enumerate(
            reader.data_reader(test_data_file, word_dict, label_dict)()):
        test_data.append([item[0], item[1]])
        if len(test_data) == batch_size:
            _infer_a_batch(inferer, test_data, word_reverse_dict,
                           label_reverse_dict)
            test_data = []

    _infer_a_batch(inferer, test_data, word_reverse_dict, label_reverse_dict)
    test_data = []
Ejemplo n.º 6
0
def infer(img_path, model_path, image_shape, label_dict_path):
    # 获取标签字典
    char_dict = load_dict(label_dict_path)
    # 获取反转的标签字典
    reversed_char_dict = load_reverse_dict(label_dict_path)
    # 获取字典大小
    dict_size = len(char_dict)
    # 获取reader
    my_reader = Reader(char_dict=char_dict, image_shape=image_shape)
    # 初始化PaddlePaddle
    paddle.init(use_gpu=True, trainer_count=1)
    # 加载训练好的参数
    parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path))
    # 获取网络模型
    model = Model(dict_size, image_shape, is_infer=True)
    # 获取预测器
    inferer = paddle.inference.Inference(output_layer=model.log_probs,
                                         parameters=parameters)
    # 加载数据
    test_batch = [[my_reader.load_image(img_path)]]
    # 开始预测
    return start_infer(inferer, test_batch, reversed_char_dict)