def obj_inference_crop(obj_inferece, img, resize_scale): roitransformer = obj_inferece det = roitransformer.inference_single(img) det = det[0] # 如果某一类中出现了多个检测,那就取其中confidence最高的那一个检测,升序的[-1],并且取前8个值为坐标 # print('det:{}'.format(det)) # assert det==np.empty((0,9)) if det.shape != (0, 9): # print(det) rgt_det = det[np.argmax(det[:, -1]), :-1] # 根据长宽比进行调整 # xc, yc=np.mean(rgt_det[0::2]),np.mean(rgt_det[1::2]) _, reranked_nodes = re_rank(rgt_det) # re_rank之后是shape为(-1,2)的列表 reranked_nodes = np.array(reranked_nodes) near_w = np.abs(reranked_nodes[0, 0] - reranked_nodes[1, 0]) near_h = np.abs(reranked_nodes[0, 1] - reranked_nodes[2, 1]) # 调整坐标和图片保证正立或倒立,但不倾倒 if near_h < near_w: pass else: imgRotation, matRotation = DumpRotateImage(img, 90) for i in range(len(reranked_nodes)): points_arr = reranked_nodes[i, :] reranked_nodes[i, :] = np.dot( matRotation, np.array([[points_arr[0]], [points_arr[1]], [1]])).reshape(-1) img = imgRotation rgt_det = [reranked_nodes.reshape(-1)] words = crop_words(img, np.array(rgt_det) / resize_scale, height=resize_scale, grayscale=False) return words[0] else: print('???') # rgt_det = det return None
def crop_boxes(self, result, grayscale=True): img = self.resized_image if len(result) > 0: rboxes = result[:, 12:17] boxes = np.asarray([rbox3_to_polygon(r) for r in rboxes]) rboxes = np.array([polygon_to_rbox(b) for b in np.reshape(boxes, (-1, 4, 2))]) bh = rboxes[:, 3] # NOTE : without margin # rboxes[:,2] += bh * 0.1 # rboxes[:,3] += bh * 0.2 boxes = np.array([rbox_to_polygon(f) for f in rboxes]) boxes = np.flip(boxes, axis=1) boxes = np.reshape(boxes, (-1, 8)) # boxes_mask_a = np.array([b[2] > b[3] for b in rboxes]) # width > height, in square world boxes_mask_b = 1 - np.array([(np.any(b < 0) or np.any(b > 1)) for b in boxes]) # box inside image # added filter to filter out detections near edges # NOTE : RoI is located on the centre of image boxes_mask_c = 1 - np.array([(np.any(b[:2] < 0.2) or np.any(b[:2] > 0.8)) for b in boxes]) boxes_mask_c = np.array(boxes_mask_c) # NOTE : fancier methods using numpy slicing didnt work # had to do it old school way # boxes_mask = np.array(boxes_mask_a* boxes_mask_b*boxes_mask_c) boxes_mask = np.array(boxes_mask_b * boxes_mask_c) boxes_ = [] for i, m in enumerate(boxes_mask): if m == 1: boxes_.append(boxes[i]) boxes = np.array(boxes_) if len(boxes) == 0: return -1, -1, -1 words = crop_words(img, boxes, self.input_height, width=self.input_width, grayscale=True) words = np.asarray([w.transpose(1, 0, 2) for w in words]) return words, boxes, 1 else: return -1, -1, -1
boxes = boxes[boxes_mask] rboxes = rboxes[boxes_mask] if len(boxes) == 0: boxes = np.empty((0,8)) #for b in boxes: # xy = b.reshape((-1,1,2)) / input_size * [vid_w, vid_h] # xy = np.round(xy) # xy = xy.astype(np.int32) # cv2.polylines(img1, [xy], True, (0,0,255)) boxes = np.clip(boxes/512, 0, 1) words = crop_words(img, boxes, input_height, width=input_width, grayscale=True) words = np.asarray([w.transpose(1,0,2) for w in words]) if len(words) > 0: res_crnn = rec_model.predict(words) xy = rboxes[:,:2] xy[:,0] = xy[:,0] - rboxes[:,2] / 2 xy = xy / input_size * [vid_w, vid_h] for i in range(len(words)): idxs = np.argmax(res_crnn[i], axis=1) confs = res_crnn[i][range(len(idxs)),idxs] non_blank_mask = idxs != len(alphabet)-1 if np.any(non_blank_mask):
boxes[:, 0::2] /= img_width boxes[:, 1::2] /= img_height boxes = np.concatenate([boxes, np.ones([boxes.shape[0], 1])], axis=1) boxes = np.copy(boxes[:, :-1]) # drop boxes with vertices outside the image mask = np.array([not (np.any(b < 0.) or np.any(b > 1.)) for b in boxes]) boxes = boxes[mask] if len(boxes) == 0: continue try: words = crop_words(img, boxes, input_height, input_width, True) except Exception as e: import traceback print(traceback.format_exc()) print(fname) continue mask = np.array([w.shape[1] > w.shape[0] for w in words]) words = [words[j] for j in range(len(words)) if mask[j]] if len(words) == 0: continue idxs_words = np.arange(len(words)) np.random.shuffle(idxs_words) words = [words[j] for j in idxs_words]