def center_crop(image, size): image_height = tf.shape(image)[0] image_width = tf.shape(image)[1] offset_height = (image_height - size) // 2 offset_width = (image_width - size) // 2 image = tf.slice(image, [offset_height, offset_width, 0], [size, size, -1]) return image
def validation_mapper(byte): image = tf.image.decode_jpeg( tf.reshape(byte, shape=[]), 3, **JPEG_OPT) image = resize_shortest_edge(image, tf.shape(image), 256) image = center_crop(image, 224) image = tf.reverse(image, axis=[2]) # to BGR return image
def rpn(self, image, features, inputs): featuremap = features[0] rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors( get_all_anchors(stride=cfg.RPN.ANCHOR_STRIDE, sizes=cfg.RPN.ANCHOR_SIZES, ratios=cfg.RPN.ANCHOR_RATIOS, max_size=cfg.PREPROC.MAX_SIZE), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if self.training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if self.training else cfg.RPN.TEST_POST_NMS_TOPK) if self.training: losses = rpn_losses(anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) else: losses = [] return BoxProposals(proposal_boxes), losses
def rpn(self, image, features, inputs): assert len(cfg.RPN.ANCHOR_SIZES) == len(cfg.FPN.ANCHOR_STRIDES) image_shape2d = tf.shape(image)[2:] # h,w all_anchors_fpn = get_all_anchors_fpn(strides=cfg.FPN.ANCHOR_STRIDES, sizes=cfg.RPN.ANCHOR_SIZES, ratios=cfg.RPN.ANCHOR_RATIOS, max_size=cfg.PREPROC.MAX_SIZE) multilevel_anchors = [ RPNAnchors(all_anchors_fpn[i], inputs['anchor_labels_lvl{}'.format(i + 2)], inputs['anchor_boxes_lvl{}'.format(i + 2)]) for i in range(len(all_anchors_fpn)) ] self.slice_feature_and_anchors(features, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [ rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in features ] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] multilevel_pred_boxes = [ anchor.decode_logits(logits) for anchor, logits in zip( multilevel_anchors, multilevel_box_logits) ] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_pred_boxes, multilevel_label_logits, image_shape2d) if self.training: losses = multilevel_rpn_losses(multilevel_anchors, multilevel_label_logits, multilevel_box_logits) else: losses = [] return BoxProposals(proposal_boxes), losses
def sample(img, coords): """ Args: img: bxhxwxc coords: bxh2xw2x2. each coordinate is (y, x) integer. Out of boundary coordinates will be clipped. Return: bxh2xw2xc image """ shape = img.get_shape().as_list()[1:] # h, w, c batch = tf.shape(img)[0] shape2 = coords.get_shape().as_list()[1:3] # h2, w2 assert None not in shape2, coords.get_shape() max_coor = tf.constant([shape[0] - 1, shape[1] - 1], dtype=tf.float32) coords = tf.clip_by_value(coords, 0., max_coor) # borderMode==repeat coords = tf.cast(coords, tf.int32) batch_index = tf.range(batch, dtype=tf.int32) batch_index = tf.reshape(batch_index, [-1, 1, 1, 1]) batch_index = tf.tile(batch_index, [1, shape2[0], shape2[1], 1]) # bxh2xw2x1 indices = tf.concat([batch_index, coords], axis=3) # bxh2xw2x3 sampled = tf.gather_nd(img, indices) return sampled
def GroupNorm(x, group, gamma_initializer=tf.constant_initializer(1.)): """ https://arxiv.org/abs/1803.08494 More code that reproduces the paper can be found at https://github.com/ppwwyyxx/GroupNorm-reproduce/. """ shape = x.get_shape().as_list() ndims = len(shape) assert ndims == 4, shape chan = shape[1] assert chan % group == 0, chan group_size = chan // group orig_shape = tf.shape(x) h, w = orig_shape[2], orig_shape[3] x = tf.reshape(x, tf.stack([-1, group, group_size, h, w])) mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True) new_shape = [1, group, group_size, 1, 1] beta = tf.get_variable('beta', [chan], initializer=tf.constant_initializer()) beta = tf.reshape(beta, new_shape) gamma = tf.get_variable('gamma', [chan], initializer=gamma_initializer) gamma = tf.reshape(gamma, new_shape) out = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-5, name='output') return tf.reshape(out, orig_shape, name='output')
def roi_heads(self, image, features, proposals, targets): image_shape2d = tf.shape(image)[2:] # h,w assert len(features) == 5, "Features have to be P23456!" gt_boxes, gt_labels, *_ = targets if self.training: proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes, gt_labels) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) if not cfg.FPN.CASCADE: roi_feature_fastrcnn = multilevel_roi_align( features[:4], proposals.boxes, 7) head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CATEGORY) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) else: def roi_func(boxes): return multilevel_roi_align(features[:4], boxes, 7) fastrcnn_head = CascadeRCNNHead(proposals, roi_func, fastrcnn_head_func, (gt_boxes, gt_labels), image_shape2d, cfg.DATA.NUM_CATEGORY) if self.training: all_losses = fastrcnn_head.losses() if cfg.MODE_MASK: gt_masks = targets[2] # maskrcnn loss roi_feature_maskrcnn = multilevel_roi_align( features[:4], proposals.fg_boxes(), 14, name_scope='multilevel_roi_align_mask') maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) return all_losses else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align( features[:4], final_boxes, 14) maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.cast(final_labels, tf.int32) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='output/masks') return []
def roi_heads(self, image, features, proposals, targets): image_shape2d = tf.shape(image)[2:] # h,w featuremap = features[0] gt_boxes, gt_labels, *_ = targets if self.training: # sample proposal boxes in training proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes, gt_labels) # The boxes to be used to crop RoIs. # Use all proposal boxes in inference boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CATEGORY) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) if self.training: all_losses = fastrcnn_head.losses() if cfg.MODE_MASK: gt_masks = targets[2] # maskrcnn loss # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds()) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) return all_losses else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: roi_resized = roi_align( featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.cast(final_labels, tf.int32) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='output/masks') return []