def main(args: argparse.Namespace): model_path = args.model image_dir = args.image_dir output_img_dir = args.output_img_dir output_txt_dir = args.output_txt_dir if output_img_dir is not None and not os.path.exists(output_img_dir): os.makedirs(output_img_dir) if output_txt_dir is not None and not os.path.exists(output_txt_dir): os.makedirs(output_txt_dir) annotation_dir = args.annotation_dir with_image = True if output_img_dir else False # 是否输出预测的图片 with_gpu = True if torch.cuda.is_available( ) and not args.no_gpu else False # 是否使用gpu model = load_model(model_path, with_gpu) if annotation_dir is not None: # 有标注文件就计算预测的各项指标 true_pos, true_neg, false_pos, false_neg = [0] * 4 for image_fn in tqdm(image_dir.glob('*.jpg')): gt_path = annotation_dir / image_fn.with_suffix( '.txt').name # 直接将.jpg的文件改成.txt就是对应的标注了 labels = load_annotation(gt_path) # try: with torch.no_grad(): # 计算模型在数据集上每个样本的预测值并保存预测图像、文本 polys, im, res = Toolbox.predict( image_fn, model, with_image, output_img_dir, with_gpu, labels, output_txt_dir, strLabelConverter(getattr(common_str, args.keys))) true_pos += res[0] false_pos += res[1] false_neg += res[2] if (true_pos + false_pos) > 0: precision = true_pos / (true_pos + false_pos) else: precision = 0 if (true_pos + false_neg) > 0: recall = true_pos / (true_pos + false_neg) else: recall = 0 print("TP: %d, FP: %d, FN: %d, precision: %f, recall: %f" % (true_pos, false_pos, false_neg, precision, recall)) else: # 没有标注文件就仅仅输出预测图像并保存 with torch.no_grad(): for image_fn in tqdm(image_dir.glob('*.jpg')): Toolbox.predict( image_fn, model, with_image, output_img_dir, with_gpu, None, None, strLabelConverter(getattr(common_str, args.keys)))
def main(args: argparse.Namespace): model_path = args.model image_dir = args.image_dir output_img_dir = args.output_img_dir output_txt_dir = args.output_txt_dir if output_img_dir is not None and not os.path.exists(output_img_dir): os.makedirs(output_img_dir) if output_txt_dir is not None and not os.path.exists(output_txt_dir): os.makedirs(output_txt_dir) annotation_dir = args.annotation_dir with_image = True if output_img_dir else False with_gpu = True if torch.cuda.is_available() and not args.no_gpu else False model = load_model(model_path, with_gpu) if annotation_dir is not None: true_pos, true_neg, false_pos, false_neg = [0] * 4 for image_fn in tqdm(image_dir.glob('*.jpg')): gt_path = annotation_dir / image_fn.with_name('gt_{}'.format( image_fn.stem)).with_suffix('.txt').name labels = load_annotation(gt_path) # try: with torch.no_grad(): polys, im, res = Toolbox.predict( image_fn, model, with_image, output_img_dir, with_gpu, labels, output_txt_dir, strLabelConverter(getattr(common_str, args.keys))) true_pos += res[0] false_pos += res[1] false_neg += res[2] if (true_pos + false_pos) > 0: precision = true_pos / (true_pos + false_pos) else: precision = 0 if (true_pos + false_neg) > 0: recall = true_pos / (true_pos + false_neg) else: recall = 0 print("TP: %d, FP: %d, FN: %d, precision: %f, recall: %f" % (true_pos, false_pos, false_neg, precision, recall)) else: with torch.no_grad(): for image_fn in tqdm(image_dir.glob('*.jpg')): Toolbox.predict( image_fn, model, with_image, output_img_dir, with_gpu, None, output_txt_dir, strLabelConverter(getattr(common_str, args.keys)))
def main(args: argparse.Namespace): model_path = args.model image_dir = args.image_dir output_img_dir = args.output_img_dir output_txt_dir = args.output_txt_dir if not os.path.exists(output_img_dir): os.makedirs(output_img_dir) if not os.path.exists(output_txt_dir): os.makedirs(output_txt_dir) annotation_dir = args.annotation_dir with_image = True if output_img_dir else False with_gpu = True if torch.cuda.is_available() else False model = load_model(model_path, with_gpu) true_pos, true_neg, false_pos, false_neg = [0] * 4 for image_fn in image_dir.glob('*.jpg'): #gt_path = annotation_dir / image_fn.with_name('gt_{}'.format(image_fn.stem)).with_suffix('.txt').name #labels = load_annotation(gt_path) #try: with torch.no_grad(): #test = pathlib.Path('datasets/ICDAR2015/ch4_test_images/img_401.jpg') #if test.samefile(image_fn): # pass polys, im, res = Toolbox.predict(image_fn, model, with_image, output_img_dir, with_gpu, None, output_txt_dir)
def main(config, resume): logger = Logger() act = config['data_loader']['activate'] if act == 0: # ICDAR 2019 LSVT data_loader = ICDAR2019DataLoaderFactory(config) train = data_loader.train() val = data_loader.val() elif act == 1: pass os.environ['CUDA_VISIBLE_DEVICES'] = ','.join( [str(i) for i in config['gpus']]) model = eval(config['arch'])(config) # model.summary() loss = eval(config['loss'])(config) metrics = [eval(metric) for metric in config['metrics']] trainer = Trainer(model, loss, metrics, resume=resume, config=config, data_loader=train, valid_data_loader=val, train_logger=logger, toolbox=Toolbox()) trainer.train()
def _compute_boxes(_score_map, _geo_map): # 从得到的两个map中恢复出原图的检测框,测试时才使用,训练不用 score = _score_map.permute(0, 2, 3, 1) geometry = _geo_map.permute(0, 2, 3, 1) score = score.detach().cpu().numpy( ) # detach后放到cpu会导致张量计算图断开,无法反向传播 geometry = geometry.detach().cpu().numpy() timer = {'net': 0, 'restore': 0, 'nms': 0} _pred_mapping = [] # 下标为i的样本有几个检测框,用于做_pred_boxes的索引 _pred_boxes = [] # 所有预测出来的检测框 for i in range(score.shape[0]): cur_score = score[i, :, :, 0] cur_geometry = geometry[i, :, :, ] detected_boxes, _ = Toolbox.detect(score_map=cur_score, geo_map=cur_geometry, timer=timer) if detected_boxes is None: continue num_detected_boxes = detected_boxes.shape[0] if len(detected_boxes) > 0: _pred_mapping.append(np.array([i] * num_detected_boxes)) _pred_boxes.append(detected_boxes) return np.concatenate(_pred_boxes) if len(_pred_boxes) > 0 else [], \ np.concatenate(_pred_mapping) if len(_pred_mapping) > 0 else []
def main(args: argparse.Namespace): model_path = args.model image_dir = args.image_dir output_img_dir = args.output_img_dir output_txt_dir = args.output_txt_dir if not os.path.exists(output_img_dir): os.makedirs(output_img_dir) if not os.path.exists(output_txt_dir): os.makedirs(output_txt_dir) annotation_dir = args.annotation_dir with_image = True if output_img_dir else False with_gpu = True if torch.cuda.is_available() else False model = load_model(model_path, with_gpu) true_pos, true_neg, false_pos, false_neg = [0] * 4 for image_fn in image_dir.glob('*.jpg'): gt_path = annotation_dir / image_fn.with_name('gt_{}'.format( image_fn.stem)).with_suffix('.txt').name try: labels = load_annotation(gt_path) except: labels = None #try: with torch.no_grad(): #test = pathlib.Path('datasets/ICDAR2015/ch4_test_images/img_401.jpg') #if test.samefile(image_fn): # pass polys, im, res = Toolbox.predict(image_fn, model, with_image, output_img_dir, with_gpu, labels, output_txt_dir) # except Exception as e: # #continue # #import pdb # #pdb.set_trace() # traceback.print_exc() true_pos += res[0] false_pos += res[1] false_neg += res[2] if (true_pos + false_pos) > 0: precision = true_pos / (true_pos + false_pos) else: precision = 0 if (true_pos + false_neg) > 0: recall = true_pos / (true_pos + false_neg) else: recall = 0 print("TP: %d, FP: %d, FN: %d, precision: %f, recall: %f" % (true_pos, false_pos, false_neg, precision, recall))
def main(args:argparse.Namespace): model_path = args.model input_dir = args.input_dir output_dir = args.output_dir with_image = True if output_dir else False with_gpu = True if torch.cuda.is_available() else False model = load_model(model_path, with_gpu) for image_fn in os.listdir(input_dir): try: with torch.no_grad(): ploy, im = Toolbox.predict(image_fn, input_dir,model, with_image, output_dir, with_gpu) except Exception as e: traceback.print_exc()
def call(self, input): image, boxes, mapping = input feature_map = self.sharedConv(image) score_map, geo_map = self.detector(feature_map) if self.training: rois, lengths, indices = self.roirotate.call( feature_map, boxes[:, :8], mapping) pred_mapping = mapping pred_boxes = boxes else: score = score_map.permute(0, 2, 3, 1) geometry = geo_map.permute(0, 2, 3, 1) score = score.detach().cpu().numpy() geometry = geometry.detach().cpu().numpy() timer = {'net': 0, 'restore': 0, 'nms': 0} pred_boxes = [] pred_mapping = [] for i in range(score.shape[0]): s = score[i, :, :, 0] g = geometry[i, :, :, ] bb, _ = Toolbox.detect(score_map=s, geo_map=g, timer=timer) bb_size = bb.shape[0] if len(bb) > 0: pred_mapping.append(np.array([i] * bb_size)) pred_boxes.append(bb) if len(pred_mapping) > 0: pred_boxes = np.concatenate(pred_boxes) pred_mapping = np.concatenate(pred_mapping) rois, lengths, indices = self.roirotate.call( feature_map, pred_boxes[:, :8], pred_mapping) else: return score_map, geo_map, ( None, None), pred_boxes, pred_mapping, None lengths = tf.convert_to_tensor(lengths) preds = self.recognizer(rois, lengths) preds = preds.permute(1, 0, 2) # B, T, C -> T, B, C return score_map, geo_map, (preds, lengths), pred_boxes, pred_mapping, indices
def detect_and_recognize(self, request, context): to_return = {'mode': 'detect_and_recognize'} to_process_img = Image.open(BytesIO(base64.b64decode( request.image))).convert('RGB') polys_and_texts, _, _ = Toolbox.predict( to_predict_img=to_process_img, model=self.model, with_img=False, output_dir=None, with_gpu=self.config['cuda'], output_txt_dir=None, labels=None, label_converter=self.label_converter) if polys_and_texts is not None and len(polys_and_texts) > 0: to_return['code'] = 200 to_return['result'] = max( polys_and_texts, key=lambda x: self._area_by_shoelace(x[0]))[1] else: to_return['code'] = 201 to_return['result'] = '未识别出' return base_pb2.OCRResponse(message=json.dumps(to_return))
def post(self): img_url = self.get_argument("img_url", default=None, strip=False) to_return = {} detected_boxes = [] try: img_content = requests.get(img_url, timeout=5).content to_process_img = Image.open(BytesIO(img_content)) detected_boxes, _, _ = Toolbox.predict( to_predict_img=to_process_img, model=model, with_img=False, output_dir=None, with_gpu=with_gpu, output_txt_dir=None, labels=None, label_converter=label_converter) to_return['code'] = 200 except requests.exceptions.RequestException as re: to_return['code'] = -1 except Exception as e: to_return['code'] = -2 finally: to_return['detect_nums'] = len(detected_boxes) to_return['bounding_boxes'] = [] to_return['boxes'] = [] for m_box, _ in detected_boxes: to_return['bounding_boxes'].append({ m_name: m_value for m_name, m_value in zip( ['left', 'top', 'height', 'width'], get_bound_box(m_box.flatten())) }) to_return['boxes'].append({ m_name: m_value.tolist() for m_name, m_value in zip([ 'left_top', 'right_top', 'right_bottom', 'left_bottom' ], m_box) }) self.write(json.dumps(to_return))
def _compute_boxes(_score_map, _geo_map): score = _score_map.permute(0, 2, 3, 1) geometry = _geo_map.permute(0, 2, 3, 1) score = score.detach().cpu().numpy() geometry = geometry.detach().cpu().numpy() timer = {'net': 0, 'restore': 0, 'nms': 0} _pred_mapping = [] _pred_boxes = [] for i in range(score.shape[0]): cur_score = score[i, :, :, 0] cur_geometry = geometry[i, :, :, ] detected_boxes, _ = Toolbox.detect(score_map=cur_score, geo_map=cur_geometry, timer=timer) if detected_boxes is None: continue num_detected_boxes = detected_boxes.shape[0] if len(detected_boxes) > 0: _pred_mapping.append(np.array([i] * num_detected_boxes)) _pred_boxes.append(detected_boxes) return np.concatenate(_pred_boxes) if len(_pred_boxes) > 0 else [], \ np.concatenate(_pred_mapping) if len(_pred_mapping) > 0 else []