def decode(preds, scale): score = torch.sigmoid(preds[-1]) outputs = (torch.sign(preds - 1) + 1) / 2 text = outputs[-1] kernels = outputs * text score = score.detach().cpu().numpy().astype(np.float32) kernels = kernels.detach().cpu().numpy() pred, label_values = pse(kernels.astype(np.uint8), 5 / (scale * scale)) bbox_list = [] for label_value in label_values: points = np.array(np.where(pred == label_value)).transpose( (1, 0))[:, ::-1] if points.shape[0] < 800 / (scale * scale): continue score_i = np.mean(score[pred == label_value]) if score_i < 0.93: continue rect = cv2.minAreaRect(points) bbox = cv2.boxPoints(rect) bbox_list.append([bbox[1], bbox[2], bbox[3], bbox[0]]) return pred, np.array(bbox_list)
def detect(seg_maps, min_area_thresh=10, seg_map_thresh=0.9, ratio=1): """Detect text boxes from score map and geo map Args: seg_maps: 6 segmentation maps from network. min_area_thresh: min area to be detected. seg_map_thresh: segmentation threshlod. ratio: segmentation ratio. Returns: boxes: detected text boxes. """ # get kernals, sequence: 0->n, max -> min kernals = [] one = np.ones_like(seg_maps[..., 0], dtype=np.uint8) zero = np.zeros_like(seg_maps[..., 0], dtype=np.uint8) thresh = seg_map_thresh for i in range(seg_maps.shape[-1] - 1, -1, -1): # 5(big),4,3,2,1,0 # 0.8ms kernal = np.where(seg_maps[..., i] > thresh, one, zero) kernals.append(kernal) thresh = seg_map_thresh * ratio mask_res, label_values = pse(kernals, min_area_thresh) # 2.8ms mask_res_resized = cv2.resize( mask_res, (mask_res.shape[1] * 4, mask_res.shape[0] * 4), interpolation=cv2.INTER_NEAREST) boxes = [] for label_value in label_values: # (y,x) points = np.argwhere(mask_res_resized == label_value) points = points[:, (1, 0)] rect = cv2.minAreaRect(points) box = cv2.boxPoints(rect) boxes.append(box) return np.array(boxes)
def detect(seg_maps, timer, image_w, image_h, min_area_thresh=10, seg_map_thresh=0.9, ratio=1): ''' restore text boxes from score map and geo map :param seg_maps: :param timer: :param min_area_thresh: :param seg_map_thresh: threshhold for seg map :param ratio: compute each seg map thresh :return: ''' if len(seg_maps.shape) == 4: seg_maps = seg_maps[0, :, :, ] # get kernals, sequence: 0->n, max -> min kernals = [] one = np.ones_like(seg_maps[..., 0], dtype=np.uint8) zero = np.zeros_like(seg_maps[..., 0], dtype=np.uint8) thresh = seg_map_thresh for i in range(seg_maps.shape[-1] - 1, -1, -1): kernal = np.where(seg_maps[..., i] > thresh, one, zero) kernals.append(kernal) thresh = seg_map_thresh * ratio start = time.time() mask_res, label_values = pse(kernals, min_area_thresh) timer['pse'] = time.time() - start mask_res = np.array(mask_res) mask_res_resized = cv2.resize(mask_res, (image_w, image_h), interpolation=cv2.INTER_NEAREST) boxes = [] for label_value in label_values: # (y,x) points = np.argwhere(mask_res_resized == label_value) points = points[:, (1, 0)] rect = cv2.minAreaRect(points) box = cv2.boxPoints(rect) # print("box(no sorted): ", box) box = box.tolist() box = sorted(box) temp = box[3] box[3] = box[1] box[1] = box[2] box[2] = temp box = np.array(box) # print("box(sorted): ", box) boxes.append(box) boxes = merge(np.array(boxes)) # boxes经过缩减,kernals不用管,不用修改 return boxes, kernals, timer
def detect_pse(seg_maps, threshold=0.5, threshold_k=0.55, boxes_thres=0.01): """ poster with pse """ seg_maps = seg_maps[0, :, :, :] image_size = seg_maps.shape[2:] mask = np.where(seg_maps[0, :, :] > threshold, 1., 0.) seg_maps = (seg_maps * mask > threshold_k) result_map = pse(seg_maps, 5) bboxes, scores = mask_to_boxes_pse(result_map, seg_maps[0, :, :], min_score=boxes_thres) return bboxes, scores
def inference(self, model, image): model.eval() image_preprocessed, scale = self._preprocess_image(image) image_preprocessed = image_preprocessed.to(self.device) with torch.no_grad(): outputs = model(image_preprocessed) score = torch.sigmoid(outputs[:, 0, ...]) outputs = (torch.sign(outputs - self.args.binary_th) + 1) / 2 text = outputs[:, 0, ...] kernels = outputs[:, 0:self.args.kernel_num, ...] * text score = score.data.cpu().numpy()[0].astype(np.float32) text = text.data.cpu().numpy()[0].astype(np.uint8) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) pred = pse( kernels, self.args.min_kernel_area / (self.args.scale * self.args.scale)) label_num = np.max(pred) + 1
def decode(preds, threshold=0.5): # preds = (preds >= threshold).detach() # preds = (preds * preds[-1]).cpu().numpy() # np.save('result.npy', preds) # pred, label_num = pse(preds,100) mask = (preds[-1] > threshold).detach().float() preds = (preds * mask).detach().cpu().numpy() pred, label_num = pse(preds >= threshold, 100) h, w = pred.shape[-2:] bbox_list = [] for label_idx in range(1, label_num + 1): result = (pred == label_idx).astype(np.uint8) _, contours, hierarchy = cv2.findContours(result, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: rect = cv2.minAreaRect(contour) point = cv2.boxPoints(rect) point[:, 0] = np.clip(point[:, 0], 0, w - 1) point[:, 1] = np.clip(point[:, 1], 0, h - 1) bbox_list.append([point[1], point[2], point[3], point[0]]) return pred, np.array(bbox_list)
def detect(seg_maps, image_w, image_h, min_area_thresh=10, seg_map_thresh=0.9, ratio=1): ''' restore text boxes from score map and geo map :param seg_maps: :param min_area_thresh: :param seg_map_thresh: threshhold for seg map :param ratio: compute each seg map thresh :return: ''' if len(seg_maps.shape) == 4: seg_maps = seg_maps[0, :, :, ] #get kernals, sequence: 0->n, max -> min kernals = [] one = np.ones_like(seg_maps[..., 0], dtype=np.uint8) zero = np.zeros_like(seg_maps[..., 0], dtype=np.uint8) thresh = seg_map_thresh for i in range(seg_maps.shape[-1] - 1, -1, -1): kernal = np.where(seg_maps[..., i] > thresh, one, zero) kernals.append(kernal) thresh = seg_map_thresh * ratio mask_res, label_values = pse(kernals, min_area_thresh) mask_res = np.array(mask_res) mask_res_resized = cv2.resize(mask_res, (image_w, image_h), interpolation=cv2.INTER_NEAREST) boxes = [] for label_value in label_values: #(y,x) points = np.argwhere(mask_res_resized == label_value) points = points[:, (1, 0)] rect = cv2.minAreaRect(points) box = cv2.boxPoints(rect) boxes.append(box) return np.array(boxes), kernals
def run_PSENet(args, model, img, org_shape, out_type='rect', return_score=False): outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - args.binary_th) + 1) / 2 text = outputs[:, 0, :, :] kernels = outputs[:, 0:args.kernel_num, :, :] * text score = score.data.cpu().numpy()[0].astype(np.float32) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) # c++ version pse pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale)) # python version pse # pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale)) # scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1]) scale = (org_shape[1] * 1.0 / pred.shape[1], org_shape[0] * 1.0 / pred.shape[0]) label = pred label_num = np.max(label) + 1 bboxes = [] for i in range(1, label_num): points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] if points.shape[0] < args.min_area / (args.scale * args.scale): continue score_i = np.mean(score[label == i]) if score_i < args.min_score: continue if out_type == 'rect': rect = cv2.boundingRect(points) x1, y1 = rect[0], rect[1] x2, y2 = x1 + rect[2] - 1, y1 + rect[3] - 1 pts = [x1, y1, x2, y1, x2, y2, x1, y2] bbox = np.array(pts).reshape(-1, 2) * scale bbox = bbox.astype('int32') elif out_type == 'rbox': rect = cv2.minAreaRect(points) bbox = cv2.boxPoints(rect) * scale bbox = bbox.astype('int32') elif out_type == 'contour': binary = np.zeros(label.shape, dtype='uint8') binary[label == i] = 1 ret = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(ret) # _, contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours = ret[-2] contour = contours[0] # epsilon = 0.01 * cv2.arcLength(contour, True) # bbox = cv2.approxPolyDP(contour, epsilon, True) bbox = contour if bbox.shape[0] <= 2: continue bbox = bbox * scale bbox = bbox.astype('int32') bboxes.append({'type': out_type, 'bbox': bbox.reshape(-1)}) if return_score: return bboxes, score return bboxes
def test(args, file=None): result = [] data_loader = DataLoader(long_size=args.long_size, file=file) test_loader = torch.utils.data.DataLoader(data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) slice = 0 # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "mobilenet": model = models.Mobilenet(pretrained=True, num_classes=6, scale=args.scale) slice = -1 for param in model.parameters(): param.requires_grad = False # model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value try: model.load_state_dict(d) except: model.load_state_dict(checkpoint['state_dict']) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) sys.stdout.flush() else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() total_frame = 0.0 total_time = 0.0 for idx, (org_img, img) in enumerate(test_loader): print('progress: %d / %d' % (idx, len(test_loader))) sys.stdout.flush() # img = Variable(img.cuda(), volatile=True) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() # torch.cuda.synchronize() start = time.time() # angle detection # org_img, angle = detect_angle(org_img) outputs = model(img) score = torch.sigmoid(outputs[:, slice, :, :]) outputs = (torch.sign(outputs - args.binary_th) + 1) / 2 text = outputs[:, slice, :, :] kernels = outputs # kernels = outputs[:, 0:args.kernel_num, :, :] * text score = score.data.cpu().numpy()[0].astype(np.float32) text = text.data.cpu().numpy()[0].astype(np.uint8) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) if args.arch == 'mobilenet': pred = pse2(kernels, args.min_kernel_area / (args.scale * args.scale)) else: # c++ version pse pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale)) # python version pse # pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale)) # scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1]) scale = (org_img.shape[1] * 1.0 / pred.shape[1], org_img.shape[0] * 1.0 / pred.shape[0]) label = pred label_num = np.max(label) + 1 bboxes = [] rects = [] for i in range(1, label_num): points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] if points.shape[0] < args.min_area / (args.scale * args.scale): continue score_i = np.mean(score[label == i]) if score_i < args.min_score: continue rect = cv2.minAreaRect(points) bbox = cv2.boxPoints(rect) * scale bbox = bbox.astype('int32') bbox = order_point(bbox) # bbox = np.array([bbox[1], bbox[2], bbox[3], bbox[0]]) bboxes.append(bbox.reshape(-1)) rec = [] rec.append(rect[-1]) rec.append(rect[1][1] * scale[1]) rec.append(rect[1][0] * scale[0]) rec.append(rect[0][0] * scale[0]) rec.append(rect[0][1] * scale[1]) rects.append(rec) # torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) print('fps: %.2f' % (total_frame / total_time)) sys.stdout.flush() for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0] write_result_as_txt(image_name, bboxes, 'outputs/submit_invoice/') text_box = cv2.resize(text_box, (text.shape[1], text.shape[0])) debug(idx, data_loader.img_paths, [[text_box]], 'data/images/tmp/') result = crnnRec(cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB), rects) result = formatResult(result) # cmd = 'cd %s;zip -j %s %s/*' % ('./outputs/', 'submit_invoice.zip', 'submit_invoice') # print(cmd) # sys.stdout.flush() # util.cmd.Cmd(cmd) return result
def test(args): data_loader = DemoDataLoader(long_size=args.long_size, input_path=args.input_dir) test_loader = torch.utils.data.DataLoader(data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=7, scale=args.scale) for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print(("Loading model and optimizer from checkpoint '{}'".format( args.resume))) checkpoint = torch.load(args.resume) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in list(checkpoint['state_dict'].items()): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print(("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) sys.stdout.flush() else: print(("No checkpoint found at '{}'".format(args.resume))) sys.stdout.flush() model.eval() total_frame = 0.0 total_time = 0.0 with torch.no_grad(): for idx, (org_img, img) in enumerate(test_loader): print(('progress: %d / %d' % (idx, len(test_loader)))) sys.stdout.flush() img = Variable(img.cuda()) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() torch.cuda.synchronize() start = time.time() outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - args.binary_th) + 1) / 2 text = outputs[:, 0, :, :] kernels = outputs[:, 0:args.kernel_num, :, :] * text score = score.data.cpu().numpy()[0].astype(np.float32) text = text.data.cpu().numpy()[0].astype(np.uint8) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) # c++ version pse pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale)) scale = (org_img.shape[1] * 1.0 / pred.shape[1], org_img.shape[0] * 1.0 / pred.shape[0]) label = pred label_num = np.max(label) + 1 bboxes = [] for i in range(1, label_num): points = np.array(np.where(label == i)).transpose( (1, 0))[:, ::-1] if points.shape[0] < args.min_area / (args.scale * args.scale): continue score_i = np.mean(score[label == i]) if score_i < args.min_score: continue rect = cv2.minAreaRect(points) bbox = cv2.boxPoints(rect) * scale bbox = bbox.astype('int32') bboxes.append(bbox.reshape(-1)) torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) print(('fps: %.2f' % (total_frame / total_time))) sys.stdout.flush() for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0), 10) image_name = data_loader.img_paths[idx].split('/')[-1].split( '.')[0] write_result_as_txt(image_name, bboxes, 'outputs/demo/') text_box = cv2.resize(text_box, (text.shape[1], text.shape[0])) debug(idx, data_loader.img_paths, [[text_box]], 'outputs/demo/')
else: img = Variable(scaled_img) outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - 1) + 1) / 2 text = outputs[:, 0, :, :] kernels = outputs[:, 0:kernel_num, :, :] * text score = score.data.numpy()[0].astype(np.float32) text = text.data.numpy()[0].astype(np.uint8) kernels = kernels.data.numpy()[0].astype(np.uint8) # c++ version pse pred = pse(kernels, min_kernel_area) # python version pse # pred = pypse(kernels, min_kernel_area) scale = (org_img.shape[1] * 1.0 / pred.shape[1], org_img.shape[0] * 1.0 / pred.shape[0]) label = pred label_num = np.max(label) + 1 bboxes = [] for i in range(1, label_num): points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] if points.shape[0] < min_area: continue score_i = np.mean(score[label == i])
def Test(): test_data_load = data_pre.DataTest_load_pre(long_size=320) print("Data num: ", len(test_data_load)) tf_image = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 3], name="image") ############################################################################################# ### Model logites And Model Path ### Self Model #resnet = PM.ResNet(PM.BottleBlock(), FLAGS.kernal_num, True, 1.0) #logites = resnet(tf_image) ## (batch, 7, size, size) #model_path = "./checkpoints/old/PSENet_BC-32_k3_2020-03-02-19-31-31.ckpt-192500" ### Model two logites, _ = model_v1.model(tf_image, FLAGS.kernal_num) ## [1,3,?,?] model_path = "./checkpoints/0302/PSENet_BC-32_k3_2020-02-26-17-06-44.ckpt-192500" ############################################################################################# saver = tf.train.Saver() sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) saver.restore(sess=sess, save_path=model_path) print("################ load model down! ##########################") for i in range(len(test_data_load)): ori_img, scaled_img = test_data_load[i] ### (h,w,3) text_box = ori_img.copy() scaled_img = np.expand_dims(scaled_img, axis=0) ### (1, h, w, 3) train_pred = sess.run([logites], feed_dict={tf_image: scaled_img}) ## [(1, 7, size, size)] train_pred = train_pred[0] ### (1, k, h, w) 0~1之间 #train_score = (pre_tools.sigmod(train_pred[0,0,:,:])).astype(np.float32) ## [512,512] mask = train_pred[:, 0, :, :] ## [1,512,512] ## 取第一个 kernal 作为 mask kernels = train_pred[:, 0:, :, :] * mask ## [1,3,512,512] 对后kernal进行mask处理 kernels = np.squeeze(kernels, 0).astype(np.uint8) ## [3,512,512] ### pse 渐进扩展输出 pred = pse(kernels, FLAGS.min_kernel_area / (FLAGS.scale * FLAGS.scale)) #cv2.imwrite("./Images/Image_OUT/image_Pred_2{}.jpg".format(i), pred * 255) ## 输出最终结果 scale = (ori_img.shape[1] * 1.0 / pred.shape[1], ori_img.shape[0] * 1.0 / pred.shape[0]) ## 变换尺寸 label = pred label_num = np.max(label) + 1 bboxes = [] for j in range(1, label_num): #point_where = np.where(label == 1) try: points = np.array(np.where(label == j)).transpose( (1, 0))[:, ::-1] except: continue if points.shape[0] < FLAGS.min_area / (FLAGS.scale * FLAGS.scale): continue rect = cv2.minAreaRect(points) bbox = cv2.boxPoints(rect) * scale bbox = bbox.astype('int32') bboxes.append(bbox.reshape(-1)) for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0), 1) text_box = cv2.resize(text_box, (ori_img.shape[1], ori_img.shape[0])) cv2.imwrite("./Images/Image_OUT/img_{}.jpg".format(i), text_box) pre_tools.write_result_as_txt(str(i), bboxes, './Images/Text_OUT/') print("Finish {} image!".format(i + 1))
bbox_list = [] for label_idx in range(1, label_num + 1): result = (pred == label_idx).astype(np.uint8) _, contours, hierarchy = cv2.findContours(result, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: rect = cv2.minAreaRect(contour) point = cv2.boxPoints(rect) point[:, 0] = np.clip(point[:, 0], 0, w - 1) point[:, 1] = np.clip(point[:, 1], 0, h - 1) bbox_list.append([point[1], point[2], point[3], point[0]]) return pred, np.array(bbox_list) if __name__ == '__main__': x = np.zeros((3, 3, 3)) y = np.ones((3, 3, 3)) s1 = np.zeros((5, 5)) s2 = np.zeros((5, 5)) s3 = np.zeros((5, 5)) s1[[0, 0, 0, 0], [0, 1, 2, 3]] = 1 s2[[2, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 2]] = 1 s3[[1, 1, 1, 1], [0, 1, 2, 3]] = 1 # com = np.concatenate((x,y,x,y),axis=2) # kernels = np.stack((s1, s2, s3)).astype(np.uint8) kernels = np.load('/data1/zj/PSENet.pytorch/result.npy') tic = time.time() pred = pse(kernels, 100) print(time.time() - tic)
def pred(args): from PIL import Image import torchvision.transforms as transforms def get_img(img_path): try: img = cv2.imread(img_path) img = img[:, :, [2, 1, 0]] except Exception as e: print(img_path) raise return img def scale(img, long_size=2240): h, w = img.shape[0:2] scale = long_size * 1.0 / max(h, w) img = cv2.resize(img, dsize=None, fx=scale, fy=scale) return img imgPath = './data/test/1.png' imgLoad = get_img(imgPath) scaled_img = scale(imgLoad, long_size=1120) scaled_img = Image.fromarray(scaled_img) scaled_img = scaled_img.convert('RGB') scaled_img = transforms.ToTensor()(scaled_img) img = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(scaled_img) org_img = imgLoad[:, :, [2, 1, 0]] # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=7, scale=args.scale) for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) sys.stdout.flush() else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() total_frame = 0.0 total_time = 0.0 sys.stdout.flush() img = Variable(img[None, :, :, :].cuda(), volatile=True) #org_img = org_img.astype('uint8')[0] text_box = org_img.copy() torch.cuda.synchronize() start = time.time() outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - args.binary_th) + 1) / 2 text = outputs[:, 0, :, :] kernels = outputs[:, 0:args.kernel_num, :, :] * text score = score.data.cpu().numpy()[0].astype(np.float32) text = text.data.cpu().numpy()[0].astype(np.uint8) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) # c++ version pse pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale)) # python version pse # pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale)) # scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1]) scale = (org_img.shape[1] * 1.0 / pred.shape[1], org_img.shape[0] * 1.0 / pred.shape[0]) label = pred label_num = np.max(label) + 1 bboxes = [] for i in range(1, label_num): points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] if points.shape[0] < args.min_area / (args.scale * args.scale): continue score_i = np.mean(score[label == i]) if score_i < args.min_score: continue rect = cv2.minAreaRect(points) bbox = cv2.boxPoints(rect) * scale bbox = bbox.astype('int32') bboxes.append(bbox.reshape(-1)) torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) print('fps: %.2f' % (total_frame / total_time)) sys.stdout.flush() for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) write_result_as_txt('1', bboxes, 'data/test/output') text_box = cv2.resize(text_box, (text.shape[1], text.shape[0])) debug(0, [imgPath], [[text_box]], 'data/test/output')
def use_psenet(img, model, precession=960, kernel_num=7, min_kernel_area=5.0, min_area=800, min_score=0.93): org_img = img[:, :, [2, 1, 0]] h, w = org_img.shape[0:2] scale = precession * 1.0 / max(h, w) scaled_img = cv2.resize(org_img, dsize=None, fx=scale, fy=scale) scaled_img = Image.fromarray(scaled_img) scaled_img = scaled_img.convert('RGB') scaled_img = transforms.ToTensor()(scaled_img) scaled_img = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(scaled_img) scaled_img = scaled_img.unsqueeze(0) text_box = org_img.copy() with torch.no_grad(): img = Variable(scaled_img.cuda()) torch.cuda.synchronize() outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - 1) + 1) / 2 text = outputs[:, 0, :, :] kernels = outputs[:, 0:kernel_num, :, :] * text score = score.data.cpu().numpy()[0].astype(np.float32) text = text.data.cpu().numpy()[0].astype(np.uint8) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) # c++ version pse pred = pse(kernels, min_kernel_area) # python version pse # pred = pypse(kernels, min_kernel_area) scale = (org_img.shape[1] * 1.0 / pred.shape[1], org_img.shape[0] * 1.0 / pred.shape[0]) label = pred label_num = np.max(label) + 1 bboxes = [] for i in range(1, label_num): points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] if points.shape[0] < min_area: continue score_i = np.mean(score[label == i]) if score_i < min_score: continue rect = cv2.minAreaRect(points) bbox = cv2.boxPoints(rect) * scale bbox = bbox.astype('int32') bboxes.append(bbox.reshape(-1)) torch.cuda.synchronize() return bboxes
def test(args): data_loader = IC15TestLoader(long_size=args.long_size) test_loader = torch.utils.data.DataLoader(data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=False, num_classes=7, scale=args.scale) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=7, scale=args.scale) for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) sys.stdout.flush() else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() total_frame = 0.0 total_time = 0.0 with torch.no_grad(): for idx, (org_img, img) in enumerate(test_loader): print('progress: %d / %d' % (idx, len(test_loader))) sys.stdout.flush() img = Variable(img.cuda()) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() torch.cuda.synchronize() start = time.time() outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - args.binary_th) + 1) / 2 # pyplot.imshow(score[0]) # pyplot.savefig('./heatmap_out/1_'+ str(idx)+'.jpg') text = outputs[:, 0, :, :] kernels = outputs[:, 0:args.kernel_num, :, :] * text score = score.data.cpu().numpy()[0].astype(np.float32) text = text.data.cpu().numpy()[0].astype(np.uint8) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) # c++ version pse pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale)) # python version pse #pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale)) #pred contains the connected components whose value is different label scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1]) label = pred label_num = np.max(label) + 1 #equals to the number of boxxes bboxes = [] for i in range(1, label_num): points = np.array(np.where(label == i)).transpose( (1, 0))[:, ::-1] #the pixels belong to connected components i if points.shape[0] < args.min_area / (args.scale * args.scale): continue score_i = np.mean(score[label == i]) if score_i < args.min_score: #score_threshold continue # rect = cv2.minAreaRect(points) # bbox = cv2.boxPoints(rect) * scale # bbox = bbox.astype('int32') # bboxes.append(bbox.reshape(-1)) binary = np.zeros(label.shape, dtype='uint8') binary[label == i] = 1 # contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contour = contours[0] # epsilon = 0.01 * cv2.arcLength(contour, True) # bbox = cv2.approxPolyDP(contour, epsilon, True) bbox = contour if bbox.shape[0] <= 2: continue bbox = bbox * scale bbox = bbox.astype('int32') bboxes.append(bbox.reshape(-1)) torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) print('fps: %.2f' % (total_frame / total_time)) sys.stdout.flush() for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(bbox.shape[0] / 2, 2)], -1, (0, 255, 0), 2) image_name = data_loader.img_paths[idx].split('/')[-1].split( '.')[0] write_result_as_txt(image_name, bboxes, 'outputs/submit_LSVT/') text_box = cv2.resize(text_box, (text.shape[1], text.shape[0])) debug(idx, data_loader.img_paths, [[text_box]], 'outputs/vis_LSVT/')