def get_det_boxes(image, display=True): h, w = image.shape[:2] if w > max_width: image = resize(image, width=max_width) elif w < min_width: image = resize(image, width=min_width) image_r = image.copy() image_c = image.copy() h, w = image.shape[:2] image = image.astype(np.float32) - config.IMAGE_MEAN image = torch.from_numpy(image.transpose(2, 0, 1)).unsqueeze(0).float() with torch.no_grad(): image = image.to(device) cls, regr = model(image) cls_prob = F.softmax(cls, dim=-1).cpu().numpy() regr = regr.cpu().numpy() anchor = gen_anchor((int(h / 16), int(w / 16)), 16) bbox = bbox_transfor_inv(anchor, regr) bbox = clip_box(bbox, [h, w]) # print(bbox.shape) fg = np.where(cls_prob[0, :, 1] > prob_thresh)[0] # print(np.max(cls_prob[0, :, 1])) select_anchor = bbox[fg, :] select_score = cls_prob[0, fg, 1] select_anchor = select_anchor.astype(np.int32) # print(select_anchor.shape) keep_index = filter_bbox(select_anchor, 16) # nms select_anchor = select_anchor[keep_index] select_score = select_score[keep_index] select_score = np.reshape(select_score, (select_score.shape[0], 1)) nmsbox = np.hstack((select_anchor, select_score)) keep = nms(nmsbox, 0.3) # print(keep) select_anchor = select_anchor[keep] select_score = select_score[keep] # text line- textConn = TextProposalConnectorOriented() text = textConn.get_text_lines(select_anchor, select_score, [h, w]) # print(text) if display: for i in text: s = str(round(i[-1] * 100, 2)) + '%' i = [int(j) for j in i] cv2.line(image_c, (i[0], i[1]), (i[2], i[3]), (0, 0, 255), 2) cv2.line(image_c, (i[0], i[1]), (i[4], i[5]), (0, 0, 255), 2) cv2.line(image_c, (i[6], i[7]), (i[2], i[3]), (0, 0, 255), 2) cv2.line(image_c, (i[4], i[5]), (i[6], i[7]), (0, 0, 255), 2) cv2.putText(image_c, s, (i[0] + 13, i[1] + 13), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA) return text, image_c, image_r
def inference(self, image): image_sz = resize(image, height=ctpn_params.IMAGE_HEIGHT) # 宽高缩放比例(等比例缩放) rescale_fac = image.shape[0] / image_sz.shape[0] h, w = image_sz.shape[:2] # 减均值 image_sz = image_sz.astype(np.float32) - ctpn_params.IMAGE_MEAN image_sz = torch.from_numpy(image_sz.transpose( 2, 0, 1)).unsqueeze(0).float() if self.use_gpu: image_sz = image_sz.cuda() cls, regr = self.model(image_sz) cls_prob = F.softmax(cls, dim=-1).cpu().numpy() regr = regr.cpu().numpy() anchor = gen_anchor((int(h / 16), int(w / 16)), 16) bbox = bbox_transfor_inv(anchor, regr) bbox = clip_box(bbox, [h, w]) fg = np.where(cls_prob[0, :, 1] > self.prob_thresh)[0] select_anchor = bbox[fg, :] select_score = cls_prob[0, fg, 1] select_anchor = select_anchor.astype(np.int32) keep_index = filter_bbox(select_anchor, 16) # nms select_anchor = select_anchor[keep_index] select_score = select_score[keep_index] select_score = np.reshape(select_score, (select_score.shape[0], 1)) nmsbox = np.hstack((select_anchor, select_score)) keep = nms(nmsbox, 0.3) select_anchor = select_anchor[keep] select_score = select_score[keep] # text line- textConn = TextProposalConnectorOriented() text = textConn.get_text_lines(select_anchor, select_score, [h, w]) text = [np.hstack((res[:8] * rescale_fac, res[8])) for res in text] return text
def dis(image): cv2.imshow('image', image) cv2.waitKey(0) cv2.destroyAllWindows() filenames = [os.path.join(config.img_path, file) for file in os.listdir(config.img_path)] print(filenames) for k in range(len(filenames)): image = cv2.imread(filenames[i]) image = resize(image, width=width) image_c = image.copy() h, w = image.shape[:2] image = image.astype(np.float32) - config.IMAGE_MEAN image = torch.from_numpy(image.transpose(2, 0, 1)).unsqueeze(0).float() with torch.no_grad(): image = image.to(device) cls, regr = model(image) cls_prob = F.softmax(cls, dim=-1).cpu().numpy() regr = regr.cpu().numpy() anchor = gen_anchor((int(h / 16), int(w / 16)), 16) bbox = bbox_transfor_inv(anchor, regr)
x = x3 cls = self.rpn_class(x) regr = self.rpn_regress(x) cls = cls.permute(0,2,3,1).contiguous() regr = regr.permute(0,2,3,1).contiguous() cls = cls.view(cls.size(0), cls.size(1)*cls.size(2)*10, 2) regr = regr.view(regr.size(0), regr.size(1)*regr.size(2)*10, 2) return cls, regr image = cv2.imread(img_path) image = resize(image, width=width,height=width) image_c = image.copy() h, w = image.shape[:2] image = image.astype(np.float32) - config.IMAGE_MEAN image = torch.from_numpy(image.transpose(2, 0, 1)).unsqueeze(0).float() model = CTPN_Model() model.load_state_dict(torch.load(weights, map_location=device)['model_state_dict'],strict=False) model.to(device) model.eval() def dis(image): plt.imshow(image[:,:,::-1]) plt.show()