def main(args): imgs_list, imgs_dir = get_image_list(args.image_path) if args.trimap_path is None: trimaps_list = None else: trimaps_list, _ = get_image_list(args.trimap_path) predictor = Predictor(args) predictor.run(imgs=imgs_list, trimaps=trimaps_list, imgs_dir=imgs_dir)
def recognize(self, img, horizontal_list=None, free_list=None, reformat=True, imgH=32): if reformat: img, img_cv_grey = reformat_input(img_cv_grey) if (horizontal_list == None) and (free_list == None): b, y_max, x_max = img.shape ratio = x_max / y_max max_width = int(imgH * ratio) crop_img = cv2.resize(img, (max_width, imgH), interpolation=Image.ANTIALIAS) image_list = [([[0, 0], [x_max, 0], [x_max, y_max], [0, y_max]], crop_img)] else: image_list, max_width = get_image_list(horizontal_list, free_list, img, model_height=imgH) result = get_text(self.recognizer, image_list) return result
def predict(args, predictor): input_names = predictor.get_input_names() input_tensor = predictor.get_input_handle(input_names[0]) output_names = predictor.get_output_names() output_tensor = predictor.get_output_handle(output_names[0]) test_num = 500 test_time = 0.0 if not args.enable_benchmark: # for PaddleHubServing if args.hubserving: img_list = [args.image_file] # for predict only else: img_list = get_image_list(args.image_file) for idx, img_name in enumerate(img_list): if not args.hubserving: img = cv2.imread(img_name)[:, :, ::-1] assert img is not None, "Error in loading image: {}".format( img_name) else: img = img_name inputs = utils.preprocess(img, args) inputs = np.expand_dims(inputs, axis=0).repeat(args.batch_size, axis=0).copy() input_tensor.copy_from_cpu(inputs) predictor.run() output = output_tensor.copy_to_cpu() classes, scores = utils.postprocess(output, args) if args.hubserving: return classes, scores print("Current image file: {}".format(img_name)) print("\ttop-1 class: {0}".format(classes[0])) print("\ttop-1 score: {0}".format(scores[0])) else: for i in range(0, test_num + 10): inputs = np.random.rand(args.batch_size, 3, 224, 224).astype(np.float32) start_time = time.time() input_tensor.copy_from_cpu(inputs) predictor.run() output = output_tensor.copy_to_cpu() output = output.flatten() if i >= 10: test_time += time.time() - start_time time.sleep(0.01) # sleep for T4 GPU fp_message = "FP16" if args.use_fp16 else "FP32" trt_msg = "using tensorrt" if args.use_tensorrt else "not using tensorrt" print("{0}\t{1}\t{2}\tbatch size: {3}\ttime(ms): {4}".format( args.model, trt_msg, fp_message, args.batch_size, 1000 * test_time / test_num))
def main(args): env_info = get_sys_env() place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[ 'GPUs used'] else 'cpu' paddle.set_device(place) if not args.cfg: raise RuntimeError('No configuration file specified.') cfg = Config(args.cfg) val_dataset = cfg.val_dataset if val_dataset is None: raise RuntimeError( 'The verification dataset is not specified in the configuration file.' ) elif len(val_dataset) == 0: raise ValueError( 'The length of val_dataset is 0. Please check if your dataset is valid' ) msg = '\n---------------Config Information---------------\n' msg += str(cfg) msg += '------------------------------------------------' logger.info(msg) model = cfg.model transforms = val_dataset.transforms image_list, image_dir = get_image_list(args.image_path) if args.trimap_path is None: trimap_list = None else: trimap_list, _ = get_image_list(args.trimap_path) logger.info('Number of predict images = {}'.format(len(image_list))) predict(model, model_path=args.model_path, transforms=transforms, image_list=image_list, image_dir=image_dir, trimap_list=trimap_list, save_dir=args.save_dir)
def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ decoder = 'greedy', beamWidth= 5, batch_size = 1,\ workers = 0, allowlist = None, blocklist = None, detail = 1,\ paragraph = False,\ contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\ reformat=True): if reformat: img, img_cv_grey = reformat_input(img_cv_grey) if (horizontal_list == None) and (free_list == None): y_max, x_max = img_cv_grey.shape ratio = x_max / y_max max_width = int(imgH * ratio) crop_img = cv2.resize(img_cv_grey, (max_width, imgH), interpolation=Image.ANTIALIAS) image_list = [([[0, 0], [x_max, 0], [x_max, y_max], [0, y_max]], crop_img)] else: image_list, max_width = get_image_list(horizontal_list, free_list, img_cv_grey, model_height=imgH) if allowlist: ignore_char = ''.join(set(self.character) - set(allowlist)) elif blocklist: ignore_char = ''.join(set(blocklist)) else: ignore_char = ''.join(set(self.character) - set(self.lang_char)) if self.model_lang in [ 'chinese_tra', 'chinese_sim', 'japanese', 'korean' ]: decoder = 'greedy' result = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ workers, self.device) if self.model_lang == 'arabic': direction_mode = 'rtl' result = [list(item) for item in result] for item in result: item[1] = get_display(item[1]) else: direction_mode = 'ltr' if paragraph: result = get_paragraph(result, mode=direction_mode) if detail == 0: return [item[1] for item in result] else: return result
def main(args): imgs_list, imgs_dir = get_image_list(args.image_path) if args.trimap_path is None: trimaps_list = None else: trimaps_list, _ = get_image_list(args.trimap_path) if use_auto_tune(args): tune_img_nums = 10 auto_tune(args, imgs_list, tune_img_nums) predictor = Predictor(args) predictor.run(imgs=imgs_list, trimaps=trimaps_list, imgs_dir=imgs_dir) if use_auto_tune(args) and \ os.path.exists(args.auto_tuned_shape_file): os.remove(args.auto_tuned_shape_file) if args.benchmark: predictor.autolog.report()
def main(): args = utils.parse_args() # assign the place place = paddle.set_device('gpu' if args.use_gpu else 'cpu') net = architectures.__dict__[args.model](class_dim=args.class_num) load_dygraph_pretrain(net, args.pretrained_model, args.load_static_weights) image_list = get_image_list(args.image_file) for idx, filename in enumerate(image_list): img = cv2.imread(filename)[:, :, ::-1] data = utils.preprocess(img, args) data = np.expand_dims(data, axis=0) data = paddle.to_tensor(data) net.eval() outputs = net(data) if args.model == "GoogLeNet": outputs = outputs[0] outputs = F.softmax(outputs) outputs = outputs.numpy() probs = postprocess(outputs) top1_class_id = 0 rank = 1 print("Current image file: {}".format(filename)) for idx, prob in probs: print("\ttop{:d}, class id: {:d}, probability: {:.4f}".format( rank, idx, prob)) if rank == 1: top1_class_id = idx rank += 1 if args.pre_label_image: save_prelabel_results(top1_class_id, filename, args.pre_label_out_idr) return
basedir = args.base_dir input_dir = os.path.join(basedir, args.input_dir) disp_dir = os.path.join(basedir, args.disp_dir) if not os.path.exists(disp_dir): os.mkdir(disp_dir) if args.save_undistort: left_dir = os.path.join(basedir, 'left') right_dir = os.path.join(basedir, 'right') if not os.path.exists(left_dir): os.mkdir(left_dir) if not os.path.exists(right_dir): os.mkdir(right_dir) img_list = utils.get_image_list(input_dir) img_size = cv2.imread(os.path.join(input_dir, img_list[0])).shape[:2][::-1] img_size = (img_size[0] // 2, img_size[1]) roi = (0, 0, img_size[0], img_size[1]) min_disp = args.min_disp if args.num_disp is None: num_disp = (img_size[0] // 8 + 15) & -16 else: num_disp = args.num_disp matcher = StereoMatcher(args.calib, img_size) matcher.create_matcher(min_disp, num_disp, args.blocksize, args.mode,
left_dir = os.path.join(basedir, args.left_dir) right_dir = os.path.join(basedir, args.right_dir) debug_dir = os.path.join(basedir, args.debug_dir) if args.debug and not os.path.exists(debug_dir): os.mkdir(debug_dir) pattern_points = np.zeros((np.prod(PATTERN_SIZE), 3), np.float32) pattern_points[:, :2] = np.indices(PATTERN_SIZE).T.reshape(-1, 2) pattern_points *= args.square_size obj_points = [] img_points_l = [] img_points_r = [] image_list = get_image_list(left_dir) pre_img_l, pre_img_r = None, None for i, fname in enumerate(image_list): print('[%d/%d] Processing % s ...' % (i + 1, len(image_list), fname)) img_l = cv2.imread(os.path.join(left_dir, fname)) img_r = cv2.imread(os.path.join(right_dir, fname)) if np.array_equal(img_l, pre_img_l) and np.array_equal( img_r, pre_img_r): continue pre_img_l, pre_img_r = img_l.copy(), img_r.copy() corners_l = find_chess_board_corners(img_l, PATTERN_SIZE) corners_r = find_chess_board_corners(img_r, PATTERN_SIZE) image_size = img_l.shape[:2][::-1]
import os import sys import cv2 from utils import get_image_list, split_stereo_image INPUT_DIR_NAME = 'rgb' LEFT_DIR_NAME = 'left' RIGHT_DIR_NAME = 'right' if __name__ == '__main__': basedir = sys.argv[1] input_dir = os.path.join(basedir, INPUT_DIR_NAME) left_dir = os.path.join(basedir, LEFT_DIR_NAME) right_dir = os.path.join(basedir, RIGHT_DIR_NAME) if not os.path.exists(left_dir): os.mkdir(left_dir) if not os.path.exists(right_dir): os.mkdir(right_dir) image_list = get_image_list(input_dir) for i, fname in enumerate(image_list): print("[%d/%d] Spliting % s ..." % (i + 1, len(image_list), fname)) im = cv2.imread(os.path.join(input_dir, fname)) im_l, im_r = split_stereo_image(im) cv2.imwrite(os.path.join(left_dir, fname), im_l) cv2.imwrite(os.path.join(right_dir, fname), im_r)
def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ decoder = 'greedy', beamWidth= 5, batch_size = 1,\ workers = 0, allowlist = None, blocklist = None, detail = 1,\ rotation_info = None,\ paragraph = False,\ contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\ reformat=True): if reformat: img, img_cv_grey = reformat_input(img_cv_grey) if allowlist: ignore_char = ''.join(set(self.character) - set(allowlist)) elif blocklist: ignore_char = ''.join(set(blocklist)) else: ignore_char = ''.join(set(self.character) - set(self.lang_char)) if self.model_lang in ['chinese_tra', 'chinese_sim']: decoder = 'greedy' if (horizontal_list == None) and (free_list == None): y_max, x_max = img_cv_grey.shape horizontal_list = [[0, x_max, 0, y_max]] free_list = [] # without gpu/parallelization, it is faster to process image one by one if ((batch_size == 1) or (self.device == 'cpu')) and not rotation_info: result = [] for bbox in horizontal_list: h_list = [bbox] f_list = [] image_list, max_width = get_image_list(h_list, f_list, img_cv_grey, model_height=imgH) result0 = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ workers, self.device) result += result0 for bbox in free_list: h_list = [] f_list = [bbox] image_list, max_width = get_image_list(h_list, f_list, img_cv_grey, model_height=imgH) result0 = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ workers, self.device) result += result0 # default mode will try to process multiple boxes at the same time else: image_list, max_width = get_image_list(horizontal_list, free_list, img_cv_grey, model_height=imgH) image_len = len(image_list) if rotation_info and image_list: image_list = make_rotated_img_list(rotation_info, image_list) max_width = max(max_width, imgH) result = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ workers, self.device) if rotation_info and (horizontal_list + free_list): result = set_result_with_confidence(result, image_len) if self.model_lang == 'arabic': direction_mode = 'rtl' result = [list(item) for item in result] for item in result: item[1] = get_display(item[1]) else: direction_mode = 'ltr' if paragraph: result = get_paragraph(result, mode=direction_mode) if detail == 0: return [item[1] for item in result] else: return result
import os import argparse import cv2 import utils parser = argparse.ArgumentParser() parser.add_argument('image_dir', metavar='IMAGE_DIR', help='the directory of images') parser.add_argument('--fps', '-f', type=int, default=20, help='the video playback speed') args = parser.parse_args() if __name__ == '__main__': img_list = utils.get_image_list(args.image_dir) for (i, fname) in enumerate(img_list): print('Frame [%d/%d] % s ...' % (i + 1, len(img_list), fname)) img = cv2.imread(os.path.join(args.image_dir, fname)) cv2.imshow('image', img) cv2.waitKey(int(1000 / args.fps)) cv2.waitKey(0)