def call(self, inputs): rois = inputs[0] mrcnn_class = inputs[1] mrcnn_bbox = inputs[2] image_meta = inputs[3] # Get windows of images in normalized coordinates. Windows are the area # in the image that excludes the padding. # Use the shape of the first image in the batch to normalize the window # because we know that all images get resized to the same size. m = parse_image_meta_graph(image_meta) image_shape = m['image_shape'][0] window = norm_boxes_graph(m['window'], image_shape[:2]) # Run detection refinement graph on each item in the batch detections_batch = utils.batch_slice([ rois, mrcnn_class, mrcnn_bbox, window ], lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config), self.config.IMAGES_PER_GPU) # Reshape output # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in # normalized coordinates return tf.reshape( detections_batch, [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6])
def generate_detect_target(proposals, gt_class_ids, gt_boxes, gt_masks, config): """ Subsamples proposals and generates target box refinement, class_ids, and masks for each. Inputs: proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs. gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. gt_masks: [batch, MAX_GT_INSTANCES, height, width] of boolean type Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks. rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs. target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)] target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width] Masks cropped to bbox boundaries and resized to neural network output size. Note: Returned arrays might be zero padded if not enough target ROIs. """ # Slice the batch and run a graph for each slice names = ['rois', 'target_class_ids', 'target_deltas', 'target_mask'] outputs = utils.batch_slice(\ [proposals, gt_class_ids, gt_boxes, gt_masks],\ lambda w,x,y,z,config : detect_target(w, x, y, z,config),\ config.IMAGES_PER_GPU, names=names,config=config) return outputs
def call(self, inputs): rois = inputs[0] mrcnn_class = inputs[1] mrcnn_bbox = inputs[2] image_meta = inputs[3] # Get windows of images in normalized coordinates. Windows are the area # in the image that excludes the padding. # Use the shape of the first image in the batch to normalize the window # because we know that all images get resized to the same size. m = utils.parse_image_meta_graph(image_meta) image_shape = m['image_shape'][0] window = utils.norm_boxes_graph(m['window'], image_shape[:2]) # Run detection refinement graph on each item in the batch detections_batch = utils.batch_slice( [rois, mrcnn_class, mrcnn_bbox, window], lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.bbox_std_dev, self.detection_min_confidence, self.detection_max_instance, self.detection_nms_threshold), self.count_image_per_gpu) # Reshape output # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in # normalized coordinates return tf.reshape( detections_batch, [self.size_batch, self.detection_max_instance, 6])
def mrcnn_class_loss_graphV2(target_class_ids, pred_class_logits, active_class_ids, batch_size=20): target_class_ids = tf.cast(target_class_ids, 'int64') # print("target_class_ids",target_class_ids.shape) # print("pred_class_logits",pred_class_logits.shape) pred_class_ids = tf.argmax(pred_class_logits, axis=2) # print("pred_class_ids2",pred_class_ids.shape) # print("active_class_ids",active_class_ids[0].shape) # pred_active = tf.zeros((batch_size, tf.shape(target_class_ids)[1])) pred_active = utils.batch_slice([active_class_ids, pred_class_ids], lambda x, y: tf.gather(x, y), batch_size) # for i in range(batch_size): # pred_active[i] = tf.gather(active_class_ids[i], pred_class_ids[i]) # pred_active = tf.gather(active_class_ids[0], pred_class_ids) # print("pred_active",pred_active.shape) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=target_class_ids, logits=pred_class_logits) pred_active = tf.cast(pred_active, tf.float32) loss = loss * pred_active loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active) return loss
def detection_graph(config, mrcnn_rois, mrcnn_class_prob, mrcnn_deltas, image_meta): metas = utils.metas_converter(image_meta) molded_shape = metas["molded_shape"] windows = utils.norm_boxes_tf(metas["window"], molded_shape[:2]) detections_batch = utils.batch_slice([mrcnn_rois, mrcnn_class_prob, mrcnn_deltas, windows], config.BATCH_SIZE_INFERENCE, lambda x, y, z, w: refine_detection_graph(x, y, z, w, config)) detections_batch = tf.reshape(detections_batch, (config.BATCH_SIZE_INFERENCE, config.MAX_NUM_ROIS, 6)) return detections_batch
def call(self, inputs): rois = inputs[0] probs = inputs[1] deltas = inputs[2] detections_batch = utils.batch_slice( [rois, probs, deltas], lambda x, y, z: refine_detections(x, y, z, self.config), 20) return detections_batch
def call(self, inputs): proposals, gt_class_ids, gt_boxes, gt_masks = inputs names = ["rois", "target_class_ids", "target_bbox", "target_mask"] outputs = utils.batch_slice([proposals, gt_class_ids, gt_boxes, gt_masks], lambda w, x, y, z: detection_targets_graph( w, x, y, z, self.config), self.config.IMAGES_PER_GPU, names=names) return outputs
def call(self, inputs, **kwargs): proposals = inputs[0] gt_class_ids = inputs[1] gt_boxes = inputs[2] # Slice the batch and run a graph for each slice # TODO: Rename target_bbox to target_deltas for clarity names = ["rois", "target_class_ids", "target_bbox"] outputs = utils.batch_slice([proposals, gt_class_ids, gt_boxes], lambda x, y, z: detection_targets_graph(x, y, z), config.BATCH_SIZE, names=names) return outputs
def proposal_graph(proposal_count, nms_threshold, config, batch_size, cls_prob, deltas, anchors): # (num_batch, N, 4) deltas = deltas * tf.reshape(tf.constant(config.RPN_BBOX_STD_DEV, tf.float32), (1, 1, 4)) scores = cls_prob[:, :, 1] indices = tf.nn.top_k(scores, k=tf.minimum(config.PRE_NMS_PROPOSALS_INFERENCE, tf.shape(anchors)[0]), sorted=True).indices # 以下indicesのものだけ. scores = utils.batch_slice((scores, indices), batch_size, tf.gather) deltas = utils.batch_slice((deltas, indices), batch_size, tf.gather) anchors = utils.batch_slice((indices,), batch_size, lambda x:tf.gather(anchors, x)) #pre_nms_box = utils.apply_deltas(anchors, deltas) pre_nms_boxes = utils.batch_slice((anchors, deltas), batch_size, utils.apply_deltas) windows = np.array([0, 0, 1, 1], dtype=np.float32) pre_nms_boxes = utils.batch_slice((pre_nms_boxes,), batch_size, lambda x: utils.clip_box(x, windows)) tf.function() #nms_indices = tf.image.non_max_suppression(pre_nms_box, scores, max_k, iou_threshold=0.5) def nms(pre_nms_box, scores): #indices = utils.non_maximum_suppression(pre_nms_box, scores, proposal_count, iou_min=0.5, sorted=True) #indices = tf.image.non_max_suppression(pre_nms_box, scores, proposal_count, nms_threshold, name="rpn_non_max_suppression") indices = utils.non_max_suppression(pre_nms_box, scores, proposal_count, nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(pre_nms_box, indices) num_pad = tf.maximum(proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, num_pad), (0, 0)]) proposals = tf.gather(proposals, tf.range(proposal_count)) return proposals proposals = utils.batch_slice((pre_nms_boxes, scores), batch_size, nms) return proposals, pre_nms_boxes
def call(self, inputs): # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] scores = inputs[0][:, :, 1] # Box deltas [batch, num_rois, 4] deltas = inputs[1] deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4]) # Base anchors anchors = self.anchors # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = min(10000, self.anchors.shape[0]) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, name="top_anchors").indices scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU) deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU) anchors = utils.batch_slice(ix, lambda x: tf.gather(anchors, x), self.config.IMAGES_PER_GPU, names=["pre_nms_anchors"]) # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = utils.batch_slice([anchors, deltas], lambda x, y: apply_box_deltas_graph(x, y), self.config.IMAGES_PER_GPU, names=["refined_anchors"]) # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)] height, width = self.config.IMAGE_SHAPE[:2] window = np.array([0, 0, height, width]).astype(np.float32) boxes = utils.batch_slice(boxes, lambda x: clip_boxes_graph(x, window), self.config.IMAGES_PER_GPU, names=["refined_anchors_clipped"]) normalized_boxes = boxes / np.array([[height, width, height, width]]) # Non-max suppression def nms(normalized_boxes, scores): indices = tf.image.non_max_suppression( normalized_boxes, scores, self.proposal_count, self.nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(normalized_boxes, indices) # Pad if needed padding = self.proposal_count - tf.shape(proposals)[0] proposals = tf.concat([proposals, tf.zeros([padding, 4])], 0) return proposals proposals = utils.batch_slice([normalized_boxes, scores], nms, self.config.IMAGES_PER_GPU) return proposals
def call(self, input): class_probs = input[0][:, :, 1] #begin foreground bbox_offset = input[1] bbox_offset = bbox_offset * np.reshape(self.config.BBOX_STD_DEV, [1, 1, 4]) anchors = self.anchors pre_nms_limit = min(self.config.PRE_NMS_LIMIT, self.anchors.shape[0]) ids = tf.nn.top_k( class_probs, pre_nms_limit, sorted=True, name="top_anchors").indices #find k largest probabilities #slice to each batch ( images per process) class_probs = utils.batch_slice([class_probs, ids], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU) bbox_offset = utils.batch_slice([bbox_offset, ids], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU) anchors = utils.batch_slice(ids, lambda x: tf.gather(anchors, x), self.config.IMAGES_PER_GPU, names=["pre_nms_anchors"]) #apply bbox to anchor boxes to get better bounding box closer to the closed Foreground object. bboxes = utils.batch_slice([anchors, bbox_offset], lambda x, y: utils.apply_bbox_offset(x, y), self.config.IMAGES_PER_GPU, names=["refined_anchors"]) #clip to 0..1 range h, w = self.config.IMAGE_SHAPE[:2] window = np.array([0, 0, h, w], dtype=np.float32) bboxes = utils.batch_slice(bboxes, lambda x: utils.clip_boxes(x, window), self.config.IMAGES_PER_GPU, names=["refined_anchors_clipped"]) #generate proposal by NMS normalized_bboxes = bboxes / np.array([[h, w, h, w]]) def nms(normalized_bboxes, scores): ids = tf.image.non_max_suppression(normalized_bboxes, scores, self.num_proposal, self.nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(normalized_bboxes, ids) padding = tf.maximum(self.num_proposal - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposals = utils.batch_slice([normalized_bboxes, class_probs], nms, self.config.IMAGES_PER_GPU) return proposals
def call(self, inputs, **kwargs): # Box coordinates # (batch_size, num_post_nms_rois=1000, 4) boxes = inputs[0] # Box Scores # (batch_size, num_post_nms_rois=1000) scores = inputs[1] batch_size = boxes.shape[0] def filter_on_score(boxes, scores): boxes_count = tf.shape(boxes)[0] keep_ix = tf.where(scores > self.text_min_score)[:, 0] boxes = tf.gather(boxes, keep_ix) scores = tf.gather(scores, keep_ix) current_boxes_count = tf.shape(boxes)[0] delta_count = boxes_count - current_boxes_count boxes = tf.pad(boxes, [(0, delta_count), (0, 0)]) scores = tf.pad(scores, [(0, delta_count)]) return boxes, scores # scores 的 shape 变为 (batch_size, pre_nms_limit) boxes, scores = utils.batch_slice([boxes, scores], filter_on_score, batch_size) # Non-max suppression def nms(boxes, scores): indices = tf.image.non_max_suppression(boxes, scores, self.text_proposal_count, self.text_nms_threshold, name="text_non_max_suppression") proposals = tf.gather(boxes, indices) probs = tf.gather(scores, indices) # Pad if needed padding = tf.maximum(self.text_proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) probs = tf.pad(probs, [(0, padding)]) return proposals, probs # (batch_size, proposal_count, 4) proposals, probs = utils.batch_slice([boxes, scores], nms, batch_size) # 多个输出不能使用 tuple, 必须是 list return [proposals, probs]
def __call__(self, ipt): proposals = ipt[0] gt_class_ids = ipt[1] gt_boxes = ipt[2] gt_masks = ipt[3] names = ["rois", "target_class_ids", "target_deltas", "target_mask"] outputs = utils.batch_slice( [proposals, gt_class_ids, gt_boxes, gt_masks], lambda w, x, y, z: layer.target_detection(w, x, y, z), self.image_per_gpu, names=names) return outputs
def call(self, inputs): rois = inputs[0] probs = inputs[1] deltas = inputs[2] detections_batch = utils.batch_slice( [rois, probs, deltas], lambda x, y, z: refine_detections(x, y, z), 20) # return tf.reshape( # detections_batch, # [16, 8, -1]) return detections_batch
def call(self, inputs, **kwargs): rois = inputs[0] rcnn_class = inputs[1] rcnn_delta = inputs[2] # Run detection refinement graph on each item in the batch detections_batch = utils.batch_slice([rois, rcnn_class, rcnn_delta], lambda x, y, w: refine_detections_graph(x, y, w), config.BATCH_SIZE) # Reshape output # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in # normalized coordinates return tf.reshape(detections_batch, [config.BATCH_SIZE, config.DETECTION_MAX_INSTANCES, 6])
def call(self, x, mask=None): assert(len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) out = utils.batch_slice([img, rois], \ lambda x,y: roi_pooling_onebacth(x,y,self.num_rois, self.pool_size, self.nb_channels), \ batch_size=config.batch_size) out = K.reshape(out, (-1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) return out
def call(self, inputs): scores = inputs[0][:, :, 1] deltas = inputs[1] deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4]) anchors = inputs[2] pre_nms_anchors = tf.minimum(self.config.PRE_NMS_LIMIT, tf.shape(anchors)[1]) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, name='top_anchors').indices scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU) deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU) pre_nms_anchors = utils.batch_slice([anchors, ix], lambda a, x: tf.gather(a, x)) window = np.array([0, 0, 1, 1], dtype=np.float32) boxes = utils.batch_slice( [pre_nms_anchors, deltas], lambda x, y: apply_box_deltas_graph(x, y), self.config.IMAGES_PER_GPU, names=['refined_anchors']) def nms(boxes, scores): indices = tf.image.non_max_suppression( boxes, scores, self.proposal_count, self.nms_threshold, name='rpn_non_max_suppresion' ) proposals = tf.gather(boxes, indices) padding = tf.maximum(self.proposal_count - tf.shape(proposals)) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposals = utils.batch_slice([boxes, scores], nms, self.config.IMAGES_PER_GPU) return proposals
def call(self, inputs): proposals = inputs[0] gt_class_ids = inputs[1] gt_boxes = inputs[2] gt_masks = inputs[3] # Slice the batch and run a graph for each slice # TODO: Rename target_bbox to target_deltas for clarity names = ["rois", "target_class_ids", "target_bbox", "target_mask"] outputs = utils.batch_slice( [proposals, gt_class_ids, gt_boxes, gt_masks], lambda w, x, y, z: detection_targets_graph( w, x, y, z, self.config), self.config.IMAGES_PER_GPU, names=names) return outputs
def get_detect_results(rois, mrcnn_class, mrcnn_bbox, mrcnn_mask, gts, config): # Run detection refinement graph on each item in the batch detections_batch, mask_batch = utils.batch_slice(\ [rois, mrcnn_class, mrcnn_bbox, mrcnn_mask, gts],\ lambda x, y, z, m, g, config : refine_detections_graph(\ x, y, z, m, g, config), config.IMAGES_PER_GPU, config=config) # Reshape output # [batch, config.DETECTION_MAX_INSTANCES, (y1, x1, y2, x2, class_id, score)] # where coordinates are normalized detections_batch = tf.reshape(detections_batch, \ [tf.shape(rois)[0], config.DETECTION_MAX_INSTANCES, 6]) # [batch, config.DETECTION_MAX_INSTANCES, MASK_H, MASK_W, CLASS_NUM] mask_batch = tf.reshape(mask_batch, \ [tf.shape(rois)[0], config.DETECTION_MAX_INSTANCES, \ config.MASK_SHAPE[0], config.MASK_SHAPE[1], config.NUM_CLASSES]) return detections_batch, mask_batch
def call(self, input): rois = input[0] rcnn_class = input[1] rcnn_bbox = input[2] image_meta = input[3] window = utils.parse_image_meta(image_meta)['window'] detections = utils.batch_slice( [rois, rcnn_class, rcnn_bbox, window], lambda x, y, z, w: refine_detections(x, y, z, w, self.config), self.config.IMAGES_PER_GPU) #[N, (y1, x1, y2, x2, class_id, score)] return tf.reshape( detections, [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6])
def call(self, inputs): # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] scores = inputs[0][:, :, 1] # Box deltas [batch, num_rois, 4] deltas = inputs[1] deltas = deltas * np.reshape(self.rpn_bbox_std_dev, [1, 1, 4]) # Anchors anchors = inputs[2] # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = tf.minimum(self.pre_nms_limit, tf.shape(anchors)[1]) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, name="top_anchors").indices scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), self.count_image_per_gpu) deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), self.count_image_per_gpu) pre_nms_anchors = utils.batch_slice([anchors, ix], lambda a, x: tf.gather(a, x), self.count_image_per_gpu, names=["pre_nms_anchors"]) # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = utils.batch_slice([pre_nms_anchors, deltas], lambda x, y: utils.apply_box_deltas_graph(x, y), self.count_image_per_gpu, names=["refined_anchors"]) # Clip to image boundaries. Since we're in normalized coordinates, # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)] window = np.array([0, 0, 1, 1], dtype=np.float32) boxes = utils.batch_slice(boxes, lambda x: utils.clip_boxes_graph(x, window), self.count_image_per_gpu, names=["refined_anchors_clipped"]) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression def nms(boxes, scores): indices = tf.image.non_max_suppression(boxes, scores, self.proposal_count, self.nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(boxes, indices) # Pad if needed padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposals = utils.batch_slice([boxes, scores], nms, self.count_image_per_gpu) return proposals
def __call__(self, ipt): scores = ipt[0][:, :, 1] deltas = ipt[1] anchors = ipt[2] pre_nms_limit = tf.minimum(self.pre_nms_limit, tf.shape(anchors)[1]) top_k_indices = tf.nn.top_k(scores, pre_nms_limit, name="top_anchors").indices top_k_scores = utils.batch_slice([scores, top_k_indices], lambda x, y: tf.gather(x, y), self.image_per_gpu) top_k_anchors = utils.batch_slice([anchors, top_k_indices], lambda x, y: tf.gather(x, y), self.image_per_gpu, names=["pre_nms_anchors"]) top_k_deltas = utils.batch_slice([deltas, top_k_indices], lambda x, y: tf.gather(x, y), self.image_per_gpu) top_k_boxes = utils.batch_slice( [top_k_anchors, top_k_deltas], lambda x, y: layer.apply_box_deltas_graph(x, y), self.image_per_gpu, names=["refined_anchors"]) window = np.array([0, 0, 1, 1], dtype=np.float32) top_k_boxes = utils.batch_slice( top_k_boxes, lambda x: layer.clip_boxes_graph(x, window), self.image_per_gpu, names=["refined_anchors_clipped"]) def nms(boxes, scores_): indices = tf.image.non_max_suppression( boxes, scores_, self.post_nms_rois_limit, self.rpn_nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(boxes, indices) padding = tf.maximum( self.post_nms_rois_limit - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposal_rois = utils.batch_slice([top_k_boxes, top_k_scores], nms, self.image_per_gpu) return proposal_rois
def __call__(self, ipt): rois = ipt[0] mrcnn_class = ipt[1] mrcnn_bbox = ipt[2] image_meta = ipt[3] m = utils.parse_image_meta_graph(image_meta) image_shape = m['image_shape'][0] window = utils.norm_boxes(m['window'], image_shape[:2]) detections_batch = utils.batch_slice( [rois, mrcnn_class, mrcnn_bbox, window], lambda w, x, y, z: layer.refine_detections(w, x, y, z), self.image_per_gpu) return tf.reshape( detections_batch, [self.image_per_gpu, self.detection_max_instances, 6])
def call(self, inputs): rois = inputs[0] mrcnn_class = inputs[1] mrcnn_bbox = inputs[2] image_meta = inputs[3] _, _, window, _ = parse_image_meta_graph(image_meta) detections_batch = utils.batch_slice( [rois, mrcnn_class, mrcnn_bbox, window], lambda x, y, w, z: refine_detection_graph(x, y, w, z, self.config), self.config.IMAGES_PER_GPU ) # return (batch_size*max_instances, 6) ??? # reshape output return tf.reshape( detections_batch, [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6] )
def call(self, input): rois = input[0] gt_ids = input[1] gt_boxes = input[2] #gt_masks = input[3] #names = ["rois", "target_class_ids", "target_bbox", "target_mask"] names = ["rois", "target_class_ids", "target_bbox"] #output = utils.batch_slice([rois,gt_ids,gt_boxes,gt_masks], lambda x,y,z,w : detection_graph(x,y,z,w,self.config), # self.config.IMAGES_PER_GPU, names=names) output = utils.batch_slice( [rois, gt_ids, gt_boxes], lambda x, y, z: detection_graph(x, y, z, self.config), self.config.IMAGES_PER_GPU, names=names) return output
def call(self, inputs): rois = inputs[0] mrcnn_class = inputs[1] mrcnn_bbox = inputs[2] image_meta = inputs[3] # Run detection refinement graph on each item in the batch _, _, window, _ = dg.parse_image_meta_graph(image_meta) detections_batch = utils.batch_slice([ rois, mrcnn_class, mrcnn_bbox, window ], lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config), self.config.IMAGES_PER_GPU) # Reshape output # [batch, num_detections, (y1, x1, y2, x2, class_score)] in pixels return tf.reshape( detections_batch, [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6])
def generate_proposal(rpn_prob, rpn_bbox, anchors, proposal_count, config): nms_thresh = config.RPN_NMS_THRESHOLD # Box Scores [Batch, num_rois, 1] scores = rpn_prob[:, :, 1] # Box deltas [batch, num_rois, 4] deltas = rpn_bbox * np.reshape(config.RPN_BBOX_STD_DEV, [1, 1, 4]) # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = tf.minimum(config.PRE_NMS_LIMIT, tf.shape(anchors)[1]) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, \ name="top_anchors").indices scores = utils.batch_slice([scores, ix], \ lambda x,y : tf.gather(x, y), config.IMAGES_PER_GPU) deltas = utils.batch_slice([deltas, ix], \ lambda x,y : tf.gather(x, y), config.IMAGES_PER_GPU) pre_nms_anchors = utils.batch_slice([anchors, ix], \ lambda x,y : tf.gather(x, y), config.IMAGES_PER_GPU) # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = utils.batch_slice([pre_nms_anchors, deltas],\ lambda x, y: utils.apply_box_deltas_graph(x, y),\ config.IMAGES_PER_GPU, names=["refined_anchors"]) # Clip to image boundaries. Since we're in normalized coordinates, # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)] window = np.array([0, 0, 1, 1], dtype=np.float32) boxes = utils.batch_slice(boxes,\ lambda x: utils.clip_boxes_graph(x, window),\ config.IMAGES_PER_GPU,names=["refined_anchors_clipped"]) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression def nms(boxes, scores): indices = tf.image.non_max_suppression(boxes, scores, proposal_count, nms_thresh,\ name='rpn_non_max_suppression') proposals = tf.gather(boxes, indices) #Pad if needed padding = tf.maximum(proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposals = utils.batch_slice([boxes, scores], nms, \ config.IMAGES_PER_GPU) proposals = tf.reshape(proposals, (-1, proposal_count, 4)) return proposals
def crop_graph_Batches(feature_map, boxes, batch_size, num_rois): croped_map = utils.batch_slice(boxes, lambda x: crop_graph_oneBatch(feature_map,x, num_rois), \ batch_size=batch_size) return croped_map
def crop_graph_oneBatch(feature_map, boxes, batch_size): croped_map = utils.batch_slice(boxes, lambda x: crop_graph(feature_map,x), batch_size=batch_size) croped_map = K.squeeze(croped_map, 1) return croped_map
def call(self, inputs, **kwargs): if self.mode == 'training': batch_size = config.BATCH_SIZE else: batch_size = 1 # Box Scores. Use the foreground class confidence. # (batch_size, num_anchors), 这里的 1 表示的就是 foreground class confidence 的下标 scores = inputs[0][:, :, 1] # Box deltas (batch_size, num_anchors, 4) deltas = inputs[1] deltas = deltas * np.reshape(config.RPN_BBOX_STD_DEV, [1, 1, 4]) # Normalized anchors anchors = inputs[2] # Improve performance by trimming to top anchors by score and doing the rest on the smaller subset. pre_nms_limit = tf.minimum(config.PRE_NMS_LIMIT, tf.shape(anchors)[1]) # top_k 如果接受的是多维数组, 那么是对最后一维进行排序, 返回的 indices 和 values 的 shape 除了最后一维的长度为 k # 其他和 scores 保持一致 # 所以这里 ix 的 shape 应该是 (batch_size, pre_nms_limit) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, name="top_anchors").indices # scores 的 shape 变为 (batch_size, pre_nms_limit) scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), batch_size) # (batch_size, pre_nms_limit, 4) deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), batch_size) # (batch_size, pre_nms_limit, 4) pre_nms_anchors = utils.batch_slice([anchors, ix], lambda x, y: tf.gather(x, y), batch_size, names=["pre_nms_anchors"]) # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = utils.batch_slice([pre_nms_anchors, deltas], lambda x, y: apply_box_deltas_graph(x, y), batch_size, names=["refined_anchors"]) # Clip to image boundaries. Since we're in normalized coordinates, # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)] window = np.array([0, 0, 1, 1], dtype=np.float32) boxes = utils.batch_slice(boxes, lambda x: clip_boxes_graph(x, window), batch_size, names=["refined_anchors_clipped"]) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression def nms(boxes, scores): indices = tf.image.non_max_suppression(boxes, scores, self.proposal_count, self.nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(boxes, indices) probs = tf.gather(scores, indices) # Pad if needed padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) probs = tf.pad(probs, [(0, padding)]) return proposals, probs # (batch_size, proposal_count, 4) proposals, probs = utils.batch_slice([boxes, scores], nms, batch_size) # 多个输出不能使用 tuple, 必须是 list return [proposals, probs]