def main(): config = load_config(FLAGS.config) merge_config(FLAGS.opt) char_ops = CharacterOps(config['Global']) config['Global']['char_num'] = char_ops.get_char_num() # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config['Global']['use_gpu'] check_gpu(use_gpu) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) rec_model = create_module( config['Architecture']['function'])(params=config) startup_prog = fluid.Program() eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): eval_outputs = rec_model(mode="test") eval_fetch_list = [v.name for v in eval_outputs] eval_prog = eval_prog.clone(for_test=True) exe.run(startup_prog) pretrain_weights = config['Global']['pretrain_weights'] if pretrain_weights is not None: fluid.load(eval_prog, pretrain_weights) test_img_path = config['test_img_path'] image_shape = config['Global']['image_shape'] blobs = test_reader(image_shape, test_img_path) predict = exe.run(program=eval_prog, feed={"image": blobs}, fetch_list=eval_fetch_list, return_numpy=False) preds = np.array(predict[0]) if preds.shape[1] == 1: preds = preds.reshape(-1) preds_lod = predict[0].lod()[0] preds_text = char_ops.decode(preds) else: end_pos = np.where(preds[0, :] == 1)[0] if len(end_pos) <= 1: preds_text = preds[0, 1:] else: preds_text = preds[0, 1:end_pos[1]] preds_text = preds_text.reshape(-1) preds_text = char_ops.decode(preds_text) fluid.io.save_inference_model("./output/", feeded_var_names=['image'], target_vars=eval_outputs, executor=exe, main_program=eval_prog, model_filename="model", params_filename="params") print(preds) print(preds_text)
class TextRecognizer(object): def __init__(self, args): if args.use_pdserving is False: self.predictor, self.input_tensor, self.output_tensors =\ utility.create_predictor(args, mode="rec") self.use_zero_copy_run = args.use_zero_copy_run self.rec_image_shape = [ int(v) for v in args.rec_image_shape.split(",") ] self.character_type = args.rec_char_type self.rec_batch_num = args.rec_batch_num self.rec_algorithm = args.rec_algorithm self.text_len = args.max_text_length char_ops_params = { "character_type": args.rec_char_type, "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char, "max_text_length": args.max_text_length } if self.rec_algorithm in ["CRNN", "Rosetta", "STAR-Net"]: char_ops_params['loss_type'] = 'ctc' self.loss_type = 'ctc' elif self.rec_algorithm == "RARE": char_ops_params['loss_type'] = 'attention' self.loss_type = 'attention' elif self.rec_algorithm == "SRN": char_ops_params['loss_type'] = 'srn' self.loss_type = 'srn' self.char_ops = CharacterOps(char_ops_params) def resize_norm_img(self, img, max_wh_ratio): imgC, imgH, imgW = self.rec_image_shape assert imgC == img.shape[2] wh_ratio = max(max_wh_ratio, imgW * 1.0 / imgH) if self.character_type == "ch": imgW = int((32 * wh_ratio)) h, w = img.shape[:2] ratio = w / float(h) if math.ceil(imgH * ratio) > imgW: resized_w = imgW else: resized_w = int(math.ceil(imgH * ratio)) resized_image = cv2.resize(img, (resized_w, imgH)) resized_image = resized_image.astype('float32') resized_image = resized_image.transpose((2, 0, 1)) / 255 resized_image -= 0.5 resized_image /= 0.5 padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) padding_im[:, :, 0:resized_w] = resized_image return padding_im def resize_norm_img_srn(self, img, image_shape): imgC, imgH, imgW = image_shape img_black = np.zeros((imgH, imgW)) im_hei = img.shape[0] im_wid = img.shape[1] if im_wid <= im_hei * 1: img_new = cv2.resize(img, (imgH * 1, imgH)) elif im_wid <= im_hei * 2: img_new = cv2.resize(img, (imgH * 2, imgH)) elif im_wid <= im_hei * 3: img_new = cv2.resize(img, (imgH * 3, imgH)) else: img_new = cv2.resize(img, (imgW, imgH)) img_np = np.asarray(img_new) img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) img_black[:, 0:img_np.shape[1]] = img_np img_black = img_black[:, :, np.newaxis] row, col, c = img_black.shape c = 1 return np.reshape(img_black, (c, row, col)).astype(np.float32) def srn_other_inputs(self, image_shape, num_heads, max_text_length, char_num): imgC, imgH, imgW = image_shape feature_dim = int((imgH / 8) * (imgW / 8)) encoder_word_pos = np.array(range(0, feature_dim)).reshape( (feature_dim, 1)).astype('int64') gsrm_word_pos = np.array(range(0, max_text_length)).reshape( (max_text_length, 1)).astype('int64') gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape( [-1, 1, max_text_length, max_text_length]) gsrm_slf_attn_bias1 = np.tile( gsrm_slf_attn_bias1, [1, num_heads, 1, 1]).astype('float32') * [-1e9] gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape( [-1, 1, max_text_length, max_text_length]) gsrm_slf_attn_bias2 = np.tile( gsrm_slf_attn_bias2, [1, num_heads, 1, 1]).astype('float32') * [-1e9] encoder_word_pos = encoder_word_pos[np.newaxis, :] gsrm_word_pos = gsrm_word_pos[np.newaxis, :] return [ encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2 ] def process_image_srn(self, img, image_shape, num_heads, max_text_length, char_ops=None): norm_img = self.resize_norm_img_srn(img, image_shape) norm_img = norm_img[np.newaxis, :] char_num = char_ops.get_char_num() [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \ self.srn_other_inputs(image_shape, num_heads, max_text_length, char_num) gsrm_slf_attn_bias1 = gsrm_slf_attn_bias1.astype(np.float32) gsrm_slf_attn_bias2 = gsrm_slf_attn_bias2.astype(np.float32) return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2) def __call__(self, img_list): img_num = len(img_list) # Calculate the aspect ratio of all text bars width_list = [] for img in img_list: width_list.append(img.shape[1] / float(img.shape[0])) # Sorting can speed up the recognition process indices = np.argsort(np.array(width_list)) #rec_res = [] rec_res = [['', 0.0]] * img_num batch_num = self.rec_batch_num predict_time = 0 for beg_img_no in range(0, img_num, batch_num): end_img_no = min(img_num, beg_img_no + batch_num) norm_img_batch = [] max_wh_ratio = 0 for ino in range(beg_img_no, end_img_no): # h, w = img_list[ino].shape[0:2] h, w = img_list[indices[ino]].shape[0:2] wh_ratio = w * 1.0 / h max_wh_ratio = max(max_wh_ratio, wh_ratio) for ino in range(beg_img_no, end_img_no): if self.loss_type != "srn": norm_img = self.resize_norm_img(img_list[indices[ino]], max_wh_ratio) norm_img = norm_img[np.newaxis, :] norm_img_batch.append(norm_img) else: norm_img = self.process_image_srn(img_list[indices[ino]], self.rec_image_shape, 8, 25, self.char_ops) encoder_word_pos_list = [] gsrm_word_pos_list = [] gsrm_slf_attn_bias1_list = [] gsrm_slf_attn_bias2_list = [] encoder_word_pos_list.append(norm_img[1]) gsrm_word_pos_list.append(norm_img[2]) gsrm_slf_attn_bias1_list.append(norm_img[3]) gsrm_slf_attn_bias2_list.append(norm_img[4]) norm_img_batch.append(norm_img[0]) norm_img_batch = np.concatenate(norm_img_batch, axis=0) norm_img_batch = norm_img_batch.copy() if self.loss_type == "srn": starttime = time.time() encoder_word_pos_list = np.concatenate(encoder_word_pos_list) gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list) gsrm_slf_attn_bias1_list = np.concatenate( gsrm_slf_attn_bias1_list) gsrm_slf_attn_bias2_list = np.concatenate( gsrm_slf_attn_bias2_list) starttime = time.time() norm_img_batch = fluid.core.PaddleTensor(norm_img_batch) encoder_word_pos_list = fluid.core.PaddleTensor( encoder_word_pos_list) gsrm_word_pos_list = fluid.core.PaddleTensor( gsrm_word_pos_list) gsrm_slf_attn_bias1_list = fluid.core.PaddleTensor( gsrm_slf_attn_bias1_list) gsrm_slf_attn_bias2_list = fluid.core.PaddleTensor( gsrm_slf_attn_bias2_list) inputs = [ norm_img_batch, encoder_word_pos_list, gsrm_slf_attn_bias1_list, gsrm_slf_attn_bias2_list, gsrm_word_pos_list ] self.predictor.run(inputs) else: starttime = time.time() if self.use_zero_copy_run: self.input_tensor.copy_from_cpu(norm_img_batch) self.predictor.zero_copy_run() else: norm_img_batch = fluid.core.PaddleTensor(norm_img_batch) self.predictor.run([norm_img_batch]) if self.loss_type == "ctc": rec_idx_batch = self.output_tensors[0].copy_to_cpu() rec_idx_lod = self.output_tensors[0].lod()[0] predict_batch = self.output_tensors[1].copy_to_cpu() predict_lod = self.output_tensors[1].lod()[0] elapse = time.time() - starttime predict_time += elapse for rno in range(len(rec_idx_lod) - 1): beg = rec_idx_lod[rno] end = rec_idx_lod[rno + 1] rec_idx_tmp = rec_idx_batch[beg:end, 0] preds_text = self.char_ops.decode(rec_idx_tmp) beg = predict_lod[rno] end = predict_lod[rno + 1] probs = predict_batch[beg:end, :] ind = np.argmax(probs, axis=1) blank = probs.shape[1] valid_ind = np.where(ind != (blank - 1))[0] if len(valid_ind) == 0: continue score = np.mean(probs[valid_ind, ind[valid_ind]]) # rec_res.append([preds_text, score]) rec_res[indices[beg_img_no + rno]] = [preds_text, score] elif self.loss_type == 'srn': rec_idx_batch = self.output_tensors[0].copy_to_cpu() probs = self.output_tensors[1].copy_to_cpu() char_num = self.char_ops.get_char_num() preds = rec_idx_batch.reshape(-1) elapse = time.time() - starttime predict_time += elapse total_preds = preds.copy() for ino in range(int(len(rec_idx_batch) / self.text_len)): preds = total_preds[ino * self.text_len:(ino + 1) * self.text_len] ind = np.argmax(probs, axis=1) valid_ind = np.where(preds != int(char_num - 1))[0] if len(valid_ind) == 0: continue score = np.mean(probs[valid_ind, ind[valid_ind]]) preds = preds[:valid_ind[-1] + 1] preds_text = self.char_ops.decode(preds) rec_res[indices[beg_img_no + ino]] = [preds_text, score] else: rec_idx_batch = self.output_tensors[0].copy_to_cpu() predict_batch = self.output_tensors[1].copy_to_cpu() elapse = time.time() - starttime predict_time += elapse for rno in range(len(rec_idx_batch)): end_pos = np.where(rec_idx_batch[rno, :] == 1)[0] if len(end_pos) <= 1: preds = rec_idx_batch[rno, 1:] score = np.mean(predict_batch[rno, 1:]) else: preds = rec_idx_batch[rno, 1:end_pos[1]] score = np.mean(predict_batch[rno, 1:end_pos[1]]) preds_text = self.char_ops.decode(preds) # rec_res.append([preds_text, score]) rec_res[indices[beg_img_no + rno]] = [preds_text, score] return rec_res, predict_time
def main(): config = program.load_config(FLAGS.config) program.merge_config(FLAGS.opt) logger.info(config) char_ops = CharacterOps(config['Global']) loss_type = config['Global']['loss_type'] config['Global']['char_ops'] = char_ops # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config['Global']['use_gpu'] # check_gpu(use_gpu) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) rec_model = create_module( config['Architecture']['function'])(params=config) startup_prog = fluid.Program() eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): _, outputs = rec_model(mode="test") fetch_name_list = list(outputs.keys()) fetch_varname_list = [outputs[v].name for v in fetch_name_list] eval_prog = eval_prog.clone(for_test=True) exe.run(startup_prog) init_model(config, eval_prog, exe) blobs = reader_main(config, 'test')() infer_img = config['Global']['infer_img'] infer_list = get_image_file_list(infer_img) max_img_num = len(infer_list) if len(infer_list) == 0: logger.info("Can not find img in infer_img dir.") for i in range(max_img_num): logger.info("infer_img:%s" % infer_list[i]) img = next(blobs) if loss_type != "srn": predict = exe.run(program=eval_prog, feed={"image": img}, fetch_list=fetch_varname_list, return_numpy=False) else: encoder_word_pos_list = [] gsrm_word_pos_list = [] gsrm_slf_attn_bias1_list = [] gsrm_slf_attn_bias2_list = [] encoder_word_pos_list.append(img[1]) gsrm_word_pos_list.append(img[2]) gsrm_slf_attn_bias1_list.append(img[3]) gsrm_slf_attn_bias2_list.append(img[4]) encoder_word_pos_list = np.concatenate(encoder_word_pos_list, axis=0).astype(np.int64) gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list, axis=0).astype(np.int64) gsrm_slf_attn_bias1_list = np.concatenate(gsrm_slf_attn_bias1_list, axis=0).astype( np.float32) gsrm_slf_attn_bias2_list = np.concatenate(gsrm_slf_attn_bias2_list, axis=0).astype( np.float32) predict = exe.run(program=eval_prog, \ feed={'image': img[0], 'encoder_word_pos': encoder_word_pos_list, 'gsrm_word_pos': gsrm_word_pos_list, 'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1_list, 'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2_list}, \ fetch_list=fetch_varname_list, \ return_numpy=False) if loss_type == "ctc": preds = np.array(predict[0]) preds = preds.reshape(-1) preds_lod = predict[0].lod()[0] preds_text = char_ops.decode(preds) probs = np.array(predict[1]) ind = np.argmax(probs, axis=1) blank = probs.shape[1] valid_ind = np.where(ind != (blank - 1))[0] if len(valid_ind) == 0: continue score = np.mean(probs[valid_ind, ind[valid_ind]]) elif loss_type == "attention": preds = np.array(predict[0]) probs = np.array(predict[1]) end_pos = np.where(preds[0, :] == 1)[0] if len(end_pos) <= 1: preds = preds[0, 1:] score = np.mean(probs[0, 1:]) else: preds = preds[0, 1:end_pos[1]] score = np.mean(probs[0, 1:end_pos[1]]) preds = preds.reshape(-1) preds_text = char_ops.decode(preds) elif loss_type == "srn": char_num = char_ops.get_char_num() preds = np.array(predict[0]) preds = preds.reshape(-1) probs = np.array(predict[1]) ind = np.argmax(probs, axis=1) valid_ind = np.where(preds != int(char_num - 1))[0] if len(valid_ind) == 0: continue score = np.mean(probs[valid_ind, ind[valid_ind]]) preds = preds[:valid_ind[-1] + 1] preds_text = char_ops.decode(preds) logger.info("\t index: {}".format(preds)) logger.info("\t word : {}".format(preds_text)) logger.info("\t score: {}".format(score)) # save for inference model target_var = [] for key, values in outputs.items(): target_var.append(values) fluid.io.save_inference_model("./output/", feeded_var_names=['image'], target_vars=target_var, executor=exe, main_program=eval_prog, model_filename="model", params_filename="params")
class TextRecognizer(object): def __init__(self, args): self.predictor, self.input_tensor, self.output_tensors =\ utility.create_predictor(args, mode="rec") self.rec_image_shape = [ int(v) for v in args.rec_image_shape.split(",") ] self.character_type = args.rec_char_type self.rec_batch_num = args.rec_batch_num self.rec_algorithm = args.rec_algorithm char_ops_params = { "character_type": args.rec_char_type, "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } if self.rec_algorithm != "RARE": char_ops_params['loss_type'] = 'ctc' self.loss_type = 'ctc' else: char_ops_params['loss_type'] = 'attention' self.loss_type = 'attention' self.char_ops = CharacterOps(char_ops_params) def resize_norm_img(self, img, max_wh_ratio): imgC, imgH, imgW = self.rec_image_shape assert imgC == img.shape[2] if self.character_type == "ch": imgW = int((32 * max_wh_ratio)) h, w = img.shape[:2] ratio = w / float(h) if math.ceil(imgH * ratio) > imgW: resized_w = imgW else: resized_w = int(math.ceil(imgH * ratio)) resized_image = cv2.resize(img, (resized_w, imgH)) resized_image = resized_image.astype('float32') resized_image = resized_image.transpose((2, 0, 1)) / 255 resized_image -= 0.5 resized_image /= 0.5 padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) padding_im[:, :, 0:resized_w] = resized_image return padding_im def __call__(self, img_list): img_num = len(img_list) # Calculate the aspect ratio of all text bars width_list = [] for img in img_list: width_list.append(img.shape[1] / float(img.shape[0])) # Sorting can speed up the recognition process indices = np.argsort(np.array(width_list)) # rec_res = [] rec_res = [['', 0.0]] * img_num batch_num = self.rec_batch_num predict_time = 0 for beg_img_no in range(0, img_num, batch_num): end_img_no = min(img_num, beg_img_no + batch_num) norm_img_batch = [] max_wh_ratio = 0 for ino in range(beg_img_no, end_img_no): # h, w = img_list[ino].shape[0:2] h, w = img_list[indices[ino]].shape[0:2] wh_ratio = w * 1.0 / h max_wh_ratio = max(max_wh_ratio, wh_ratio) for ino in range(beg_img_no, end_img_no): # norm_img = self.resize_norm_img(img_list[ino], max_wh_ratio) norm_img = self.resize_norm_img(img_list[indices[ino]], max_wh_ratio) norm_img = norm_img[np.newaxis, :] norm_img_batch.append(norm_img) norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = norm_img_batch.copy() starttime = time.time() self.input_tensor.copy_from_cpu(norm_img_batch) self.predictor.zero_copy_run() if self.loss_type == "ctc": rec_idx_batch = self.output_tensors[0].copy_to_cpu() rec_idx_lod = self.output_tensors[0].lod()[0] predict_batch = self.output_tensors[1].copy_to_cpu() predict_lod = self.output_tensors[1].lod()[0] elapse = time.time() - starttime predict_time += elapse for rno in range(len(rec_idx_lod) - 1): beg = rec_idx_lod[rno] end = rec_idx_lod[rno + 1] rec_idx_tmp = rec_idx_batch[beg:end, 0] preds_text = self.char_ops.decode(rec_idx_tmp) beg = predict_lod[rno] end = predict_lod[rno + 1] probs = predict_batch[beg:end, :] ind = np.argmax(probs, axis=1) blank = probs.shape[1] valid_ind = np.where(ind != (blank - 1))[0] score = np.mean(probs[valid_ind, ind[valid_ind]]) if len(valid_ind) == 0: continue # rec_res.append([preds_text, score]) rec_res[indices[beg_img_no + rno]] = [preds_text, score] else: rec_idx_batch = self.output_tensors[0].copy_to_cpu() predict_batch = self.output_tensors[1].copy_to_cpu() elapse = time.time() - starttime predict_time += elapse for rno in range(len(rec_idx_batch)): end_pos = np.where(rec_idx_batch[rno, :] == 1)[0] if len(end_pos) <= 1: preds = rec_idx_batch[rno, 1:] score = np.mean(predict_batch[rno, 1:]) else: preds = rec_idx_batch[rno, 1:end_pos[1]] score = np.mean(predict_batch[rno, 1:end_pos[1]]) preds_text = self.char_ops.decode(preds) # rec_res.append([preds_text, score]) rec_res[indices[beg_img_no + rno]] = [preds_text, score] return rec_res, predict_time
class TextRecognizer(object): def __init__(self, args): self.predictor, self.input_tensor, self.output_tensors =\ utility.create_predictor(args, mode="rec") image_shape = [int(v) for v in args.rec_image_shape.split(",")] self.rec_image_shape = image_shape self.character_type = args.rec_char_type char_ops_params = {} char_ops_params["character_type"] = args.rec_char_type char_ops_params["character_dict_path"] = args.rec_char_dict_path char_ops_params['loss_type'] = 'ctc' self.char_ops = CharacterOps(char_ops_params) def resize_norm_img(self, img, max_wh_ratio): imgC, imgH, imgW = self.rec_image_shape if self.character_type == "ch": imgW = int(32 * max_wh_ratio) h = img.shape[0] w = img.shape[1] ratio = w / float(h) if math.ceil(imgH * ratio) > imgW: resized_w = imgW else: resized_w = int(math.ceil(imgH * ratio)) resized_image = cv2.resize(img, (resized_w, imgH)) resized_image = resized_image.astype('float32') resized_image = resized_image.transpose((2, 0, 1)) / 255 resized_image -= 0.5 resized_image /= 0.5 padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) padding_im[:, :, 0:resized_w] = resized_image return padding_im def __call__(self, img_list): img_num = len(img_list) batch_num = 30 rec_res = [] predict_time = 0 for beg_img_no in range(0, img_num, batch_num): end_img_no = min(img_num, beg_img_no + batch_num) norm_img_batch = [] max_wh_ratio = 0 for ino in range(beg_img_no, end_img_no): h, w = img_list[ino].shape[0:2] wh_ratio = w * 1.0 / h max_wh_ratio = max(max_wh_ratio, wh_ratio) for ino in range(beg_img_no, end_img_no): norm_img = self.resize_norm_img(img_list[ino], max_wh_ratio) norm_img = norm_img[np.newaxis, :] norm_img_batch.append(norm_img) norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = norm_img_batch.copy() starttime = time.time() self.input_tensor.copy_from_cpu(norm_img_batch) self.predictor.zero_copy_run() rec_idx_batch = self.output_tensors[0].copy_to_cpu() rec_idx_lod = self.output_tensors[0].lod()[0] predict_batch = self.output_tensors[1].copy_to_cpu() predict_lod = self.output_tensors[1].lod()[0] elapse = time.time() - starttime predict_time += elapse starttime = time.time() for rno in range(len(rec_idx_lod) - 1): beg = rec_idx_lod[rno] end = rec_idx_lod[rno + 1] rec_idx_tmp = rec_idx_batch[beg:end, 0] preds_text = self.char_ops.decode(rec_idx_tmp) beg = predict_lod[rno] end = predict_lod[rno + 1] probs = predict_batch[beg:end, :] ind = np.argmax(probs, axis=1) blank = probs.shape[1] valid_ind = np.where(ind != (blank - 1))[0] score = np.mean(probs[valid_ind, ind[valid_ind]]) rec_res.append([preds_text, score]) return rec_res, predict_time
def main(): config = program.load_config(FLAGS.config) program.merge_config(FLAGS.opt) logger.info(config) char_ops = CharacterOps(config['Global']) config['Global']['char_ops'] = char_ops # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config['Global']['use_gpu'] # check_gpu(use_gpu) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) rec_model = create_module(config['Architecture']['function'])(params=config) startup_prog = fluid.Program() eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): _, outputs = rec_model(mode="test") fetch_name_list = list(outputs.keys()) fetch_varname_list = [outputs[v].name for v in fetch_name_list] eval_prog = eval_prog.clone(for_test=True) exe.run(startup_prog) init_model(config, eval_prog, exe) blobs = reader_main(config, 'test') imgs = next(blobs()) for img in imgs: predict = exe.run(program=eval_prog, feed={"image": img}, fetch_list=fetch_varname_list, return_numpy=False) preds = np.array(predict[0]) if preds.shape[1] == 1: preds = preds.reshape(-1) preds_lod = predict[0].lod()[0] preds_text = char_ops.decode(preds) else: end_pos = np.where(preds[0, :] == 1)[0] if len(end_pos) <= 1: preds_text = preds[0, 1:] else: preds_text = preds[0, 1:end_pos[1]] preds_text = preds_text.reshape(-1) preds_text = char_ops.decode(preds_text) print(preds) print(preds_text) # save for inference model target_var = [] for key, values in outputs.items(): target_var.append(values) fluid.io.save_inference_model( "./output/", feeded_var_names=['image'], target_vars=target_var, executor=exe, main_program=eval_prog, model_filename="model", params_filename="params")