def textline_extract(image, prediction, threshold=0.3): h, w, _ = image.shape cls = np.array(prediction[0]) regr = np.array(prediction[1]) cls_prod = np.array(prediction[2]) anchor = utils.gen_anchor((int(h / 16), int(w / 16)), 16) bbox = utils.bbox_transfor_inv(anchor, regr) bbox = utils.clip_box(bbox, [h, w]) #score > 0.7 fg = np.where(cls_prod[0, :, 1] > threshold)[0] select_anchor = bbox[fg, :] select_score = cls_prod[0, fg, 1] select_anchor = select_anchor.astype('int32') #filter size keep_index = utils.filter_bbox(select_anchor, 16) #nsm select_anchor = select_anchor[keep_index] select_score = select_score[keep_index] select_score = np.reshape(select_score, (select_score.shape[0], 1)) nmsbox = np.hstack((select_anchor, select_score)) keep = utils.nms(nmsbox, 0.3) select_anchor = select_anchor[keep] select_score = select_score[keep] #text line textConn = text_connect.TextProposalConnector() text = textConn.get_text_lines(select_anchor, select_score, [h, w]) text = list(text.astype('int32')) return text
def __call__(self, img, bboxes): #Chose a random digit to scale by img_shape = img.shape #translate the image #percentage of the dimension of the image to translate translate_factor_x = random.uniform(*self.translate) translate_factor_y = random.uniform(*self.translate) if not self.diff: translate_factor_y = translate_factor_x canvas = np.zeros(img_shape).astype(np.uint8) corner_x = int(translate_factor_x*img.shape[1]) corner_y = int(translate_factor_y*img.shape[0]) #change the origin to the top-left corner of the translated box orig_box_cords = [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])] mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:] canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask img = canvas #bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y] bboxes = bboxes.astype(np.float64) bboxes += np.array([corner_x, corner_y, corner_x, corner_y]) bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25) return img, bboxes
def __call__(self, img, bboxes): angle = random.uniform(*self.angle) w, h = img.shape[1], img.shape[0] cx, cy = w // 2, h // 2 img = rotate_im(img, angle) corners = get_corners(bboxes) corners = np.hstack((corners, bboxes[:, 4:])) corners[:, :8] = rotate_box(corners[:, :8], angle, cx, cy, h, w) new_bbox = get_enclosing_box(corners).astype(np.float64) scale_factor_x = img.shape[1] / w scale_factor_y = img.shape[0] / h img = cv2.resize(img, (w, h)) new_bbox[:, :4] /= np.array( [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y]) bboxes = new_bbox bboxes = clip_box(bboxes, [0, 0, w, h], 0.25) return img, bboxes
def proposal_graph(proposal_count, nms_threshold, config, batch_size, cls_prob, deltas, anchors): # (num_batch, N, 4) deltas = deltas * tf.reshape(tf.constant(config.RPN_BBOX_STD_DEV, tf.float32), (1, 1, 4)) scores = cls_prob[:, :, 1] indices = tf.nn.top_k(scores, k=tf.minimum(config.PRE_NMS_PROPOSALS_INFERENCE, tf.shape(anchors)[0]), sorted=True).indices # 以下indicesのものだけ. scores = utils.batch_slice((scores, indices), batch_size, tf.gather) deltas = utils.batch_slice((deltas, indices), batch_size, tf.gather) anchors = utils.batch_slice((indices,), batch_size, lambda x:tf.gather(anchors, x)) #pre_nms_box = utils.apply_deltas(anchors, deltas) pre_nms_boxes = utils.batch_slice((anchors, deltas), batch_size, utils.apply_deltas) windows = np.array([0, 0, 1, 1], dtype=np.float32) pre_nms_boxes = utils.batch_slice((pre_nms_boxes,), batch_size, lambda x: utils.clip_box(x, windows)) tf.function() #nms_indices = tf.image.non_max_suppression(pre_nms_box, scores, max_k, iou_threshold=0.5) def nms(pre_nms_box, scores): #indices = utils.non_maximum_suppression(pre_nms_box, scores, proposal_count, iou_min=0.5, sorted=True) #indices = tf.image.non_max_suppression(pre_nms_box, scores, proposal_count, nms_threshold, name="rpn_non_max_suppression") indices = utils.non_max_suppression(pre_nms_box, scores, proposal_count, nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(pre_nms_box, indices) num_pad = tf.maximum(proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, num_pad), (0, 0)]) proposals = tf.gather(proposals, tf.range(proposal_count)) return proposals proposals = utils.batch_slice((pre_nms_boxes, scores), batch_size, nms) return proposals, pre_nms_boxes
def __call__(self, img, bboxes): #Chose a random digit to scale by img_shape = img.shape if self.diff: scale_x = random.uniform(*self.scale) scale_y = random.uniform(*self.scale) else: scale_x = random.uniform(*self.scale) scale_y = scale_x resize_scale_x = 1 + scale_x resize_scale_y = 1 + scale_y img= cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y) #bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y] bboxes = bboxes.astype(np.float64) bboxes *= np.array([resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]) canvas = np.zeros(img_shape, dtype = np.uint8) y_lim = int(min(resize_scale_y,1)*img_shape[0]) x_lim = int(min(resize_scale_x,1)*img_shape[1]) #print(y_lim, x_lim) canvas[:y_lim,:x_lim,:] = img[:y_lim,:x_lim,:] img = canvas bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25) return img, bboxes
def refine_detection_graph(rois, cls_prob, deltas, window, config): # cls_probが大きいもの(deltaごとに1つだけ) cls_ids = tf.cast(tf.argmax(cls_prob, axis=1), tf.int32) # ((0, i0), (1, i1), (2, i2), ...) indices = tf.stack([tf.range(tf.shape(cls_ids)[0]), cls_ids], axis=1) cls_scores = tf.gather_nd(cls_prob, indices) # 最もcls_probの大きいclsのdeltasを用いる. cls_deltas = tf.gather_nd(deltas, indices) # roiは提案されたもの. boxはdeltaを適用したもの. boxes = utils.apply_deltas(rois, cls_deltas*config.BBOX_STD_DEV) boxes = utils.clip_box(boxes, window) # back_groundではないもの keep = tf.where(cls_ids > 0)[:, 0] if config.MIN_SCORE: keep_2 = tf.where(cls_scores > config.MIN_SCORE)[:, 0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(keep_2, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] pre_nms_cls_ids = tf.gather(cls_ids, keep) pre_nms_scores = tf.gather(cls_scores, keep) pre_nms_boxes = tf.gather(boxes, keep) unique_class_ids = tf.unique(pre_nms_cls_ids)[0] def nms_cls(cls_id): #ixs = tf.where(tf.equal(pre_nms_class_ids, class_id)) # pre_nmsのなかで何番目か ixs = tf.where(tf.equal(pre_nms_cls_ids, cls_id))[:, 0] # ixsの中で何番目か cls_keep = tf.image.non_max_suppression( tf.gather(pre_nms_boxes, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=config.MAX_NUM_ROIS, iou_threshold=config.DETECTION_NMS_THRESHOLD) cls_keep = tf.gather(ixs, cls_keep) cls_keep = tf.gather(keep, cls_keep) gap = config.MAX_NUM_ROIS - tf.shape(cls_keep)[0] cls_keep = tf.pad(cls_keep, [(0, gap)], mode="CONSTANT", constant_values=-1) cls_keep.set_shape([config.MAX_NUM_ROIS]) return cls_keep nms_keep = tf.map_fn(nms_cls, unique_class_ids, dtype=tf.int64) nms_keep = tf.reshape(nms_keep, (-1,)) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) keep = tf.sets.set_intersection(tf.expand_dims(nms_keep, 0), tf.expand_dims(keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] roi_cound = config.MAX_NUM_ROIS class_score_keep = tf.gather(cls_scores, keep) num_keep = tf.minimum(roi_cound, tf.shape(class_score_keep)[0]) top_ids = tf.nn.top_k(class_score_keep, num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) detected_boxes = tf.gather(boxes, keep) detected_cls_ids = tf.gather(cls_ids, keep) detected_cls_scores = tf.gather(cls_scores, keep) detections = tf.concat([detected_boxes, tf.to_float(detected_cls_ids)[..., tf.newaxis], detected_cls_scores[..., tf.newaxis]], axis=1) gap = roi_cound - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections
def textline_extract(self,image): assert len(image.shape) == 3 h,w,c= image.shape #zero-center by mean pixel m_img = image - utils.IMAGE_MEAN # m_img = np.expand_dims(m_img,axis=0) p_data0 = {'inputs_1':m_img} param = {"instances":[p_data0]} predict_request = json.dumps(param,cls=NumpyEncoder) start = time.time() response = requests.post(self.server_url, data=predict_request) response.raise_for_status() prediction = response.json()['predictions'][0] # result = self.basemodel.predict(m_img) cls = np.array(prediction['output0']) regr = np.array(prediction['output1']) cls_prod = np.array(prediction['output2']) cls = np.expand_dims(cls,axis=0) regr = np.expand_dims(regr,axis=0) cls_prod = np.expand_dims(cls_prod,axis=0) anchor = utils.gen_anchor((int(h/16),int(w/16)),16) bbox = utils.bbox_transfor_inv(anchor,regr) bbox = utils.clip_box(bbox,[h,w]) #score > 0.7 fg = np.where(cls_prod[0,:,1]>self.threshold)[0] select_anchor = bbox[fg,:] select_score = cls_prod[0,fg,1] select_anchor = select_anchor.astype('int32') #filter size keep_index = utils.filter_bbox(select_anchor,16) #nsm select_anchor = select_anchor[keep_index] select_score = select_score[keep_index] select_score = np.reshape(select_score,(select_score.shape[0],1)) nmsbox = np.hstack((select_anchor,select_score)) keep = utils.nms(nmsbox,0.3) select_anchor = select_anchor[keep] select_score = select_score[keep] #text line textConn = text_connect.TextProposalConnector() text = textConn.get_text_lines(select_anchor,select_score,[h,w]) end = time.time() text = list(text.astype('int32')) return text
def textline_extract(self,image): assert len(image.shape) == 3 h,w,c= image.shape #zero-center by mean pixel m_img = image - utils.IMAGE_MEAN m_img = np.expand_dims(m_img,axis=0) start = time.time() result = self.basemodel.predict(m_img) cls,regr,cls_prod = result anchor = utils.gen_anchor((int(h/16),int(w/16)),16) bbox = utils.bbox_transfor_inv(anchor,regr) bbox = utils.clip_box(bbox,[h,w]) #score > 0.7 fg = np.where(cls_prod[0,:,1]>self.threshold)[0] select_anchor = bbox[fg,:] select_score = cls_prod[0,fg,1] select_anchor = select_anchor.astype('int32') #filter size keep_index = utils.filter_bbox(select_anchor,16) #nsm select_anchor = select_anchor[keep_index] select_score = select_score[keep_index] select_score = np.reshape(select_score,(select_score.shape[0],1)) nmsbox = np.hstack((select_anchor,select_score)) keep = utils.nms(nmsbox,0.3) select_anchor = select_anchor[keep] select_score = select_score[keep] #text line textConn = text_connect.TextProposalConnector() text = textConn.get_text_lines(select_anchor,select_score,[h,w]) end = time.time() text = list(text.astype('int32')) return text
def _detect(image): ''' :param image: numpy array image, h,w,c :return: text location,a list which contains a list of x, y coors, prob. with shape (#lines, 9) ''' h, w, c = image.shape image = image - config.IMAGE_MEAN image = np.expand_dims(image, axis=0) # batch_sz, h, w, c _, regr, cls_prob = infer_model.predict(image) anchor = utils.gen_anchor((int(h / 16), int(w / 16)), 16) bbox = utils.bbox_transfor_inv(anchor, regr) bbox = utils.clip_box(bbox, [h, w]) fg = np.where(cls_prob[0, :, 1] > 0.7)[0] select_anchor = bbox[fg, :] select_score = cls_prob[0, fg, 1] select_anchor = select_anchor.astype(np.int32) keep_index = utils.filter_bbox(select_anchor, 16) # nsm select_anchor = select_anchor[keep_index] select_score = select_score[keep_index] select_score = np.reshape(select_score, (select_score.shape[0], 1)) nmsbox = np.hstack((select_anchor, select_score)) keep = utils.nms(nmsbox, 0.3) select_anchor = select_anchor[keep] select_score = select_score[keep] # text line textConn = TextProposalConnectorOriented() text = textConn.get_text_lines(select_anchor, select_score, [h, w]) return text
##################### detection ###################### image = cv2.imread(img_path) image = utils.resize(image, width=width) image_ori = image.copy() h, w, c = image.shape image = image - config.IMAGE_MEAN image = np.expand_dims(image, axis=0) infer_model = ctpn_model.create_ctpn_model() infer_model.load_weights(ctpn_weight_path) cls, regr, cls_prob = infer_model.predict(image) anchor = utils.gen_anchor((int(h / 16), int(w / 16)), 16) bbox = utils.bbox_transfor_inv(anchor, regr) bbox = utils.clip_box(bbox, [h, w]) fg = np.where(cls_prob[0, :, 1] > 0.7)[0] select_anchor = bbox[fg, :] select_score = cls_prob[0, fg, 1] select_anchor = select_anchor.astype(np.int32) keep_index = utils.filter_bbox(select_anchor, 16) # nsm select_anchor = select_anchor[keep_index] select_score = select_score[keep_index] select_score = np.reshape(select_score, (select_score.shape[0], 1)) nmsbox = np.hstack((select_anchor, select_score)) keep = utils.nms(nmsbox, 0.3) select_anchor = select_anchor[keep]