Beispiel #1
0
    def center_crop(image, size):
        image_height = tf.shape(image)[0]
        image_width = tf.shape(image)[1]

        offset_height = (image_height - size) // 2
        offset_width = (image_width - size) // 2
        image = tf.slice(image, [offset_height, offset_width, 0], [size, size, -1])
        return image
Beispiel #2
0
 def validation_mapper(byte):
     image = tf.image.decode_jpeg(
         tf.reshape(byte, shape=[]), 3, **JPEG_OPT)
     image = resize_shortest_edge(image, tf.shape(image), 256)
     image = center_crop(image, 224)
     image = tf.reverse(image, axis=[2])  # to BGR
     return image
Beispiel #3
0
    def rpn(self, image, features, inputs):
        featuremap = features[0]
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap,
                                                    cfg.RPN.HEAD_DIM,
                                                    cfg.RPN.NUM_ANCHOR)
        anchors = RPNAnchors(
            get_all_anchors(stride=cfg.RPN.ANCHOR_STRIDE,
                            sizes=cfg.RPN.ANCHOR_SIZES,
                            ratios=cfg.RPN.ANCHOR_RATIOS,
                            max_size=cfg.PREPROC.MAX_SIZE),
            inputs['anchor_labels'], inputs['anchor_boxes'])
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = anchors.decode_logits(
            rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if self.training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if self.training else cfg.RPN.TEST_POST_NMS_TOPK)

        if self.training:
            losses = rpn_losses(anchors.gt_labels, anchors.encoded_gt_boxes(),
                                rpn_label_logits, rpn_box_logits)
        else:
            losses = []

        return BoxProposals(proposal_boxes), losses
Beispiel #4
0
    def rpn(self, image, features, inputs):
        assert len(cfg.RPN.ANCHOR_SIZES) == len(cfg.FPN.ANCHOR_STRIDES)

        image_shape2d = tf.shape(image)[2:]  # h,w
        all_anchors_fpn = get_all_anchors_fpn(strides=cfg.FPN.ANCHOR_STRIDES,
                                              sizes=cfg.RPN.ANCHOR_SIZES,
                                              ratios=cfg.RPN.ANCHOR_RATIOS,
                                              max_size=cfg.PREPROC.MAX_SIZE)
        multilevel_anchors = [
            RPNAnchors(all_anchors_fpn[i],
                       inputs['anchor_labels_lvl{}'.format(i + 2)],
                       inputs['anchor_boxes_lvl{}'.format(i + 2)])
            for i in range(len(all_anchors_fpn))
        ]
        self.slice_feature_and_anchors(features, multilevel_anchors)

        # Multi-Level RPN Proposals
        rpn_outputs = [
            rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL,
                     len(cfg.RPN.ANCHOR_RATIOS)) for pi in features
        ]
        multilevel_label_logits = [k[0] for k in rpn_outputs]
        multilevel_box_logits = [k[1] for k in rpn_outputs]
        multilevel_pred_boxes = [
            anchor.decode_logits(logits) for anchor, logits in zip(
                multilevel_anchors, multilevel_box_logits)
        ]

        proposal_boxes, proposal_scores = generate_fpn_proposals(
            multilevel_pred_boxes, multilevel_label_logits, image_shape2d)

        if self.training:
            losses = multilevel_rpn_losses(multilevel_anchors,
                                           multilevel_label_logits,
                                           multilevel_box_logits)
        else:
            losses = []

        return BoxProposals(proposal_boxes), losses
def sample(img, coords):
    """
    Args:
        img: bxhxwxc
        coords: bxh2xw2x2. each coordinate is (y, x) integer.
            Out of boundary coordinates will be clipped.
    Return:
        bxh2xw2xc image
    """
    shape = img.get_shape().as_list()[1:]   # h, w, c
    batch = tf.shape(img)[0]
    shape2 = coords.get_shape().as_list()[1:3]  # h2, w2
    assert None not in shape2, coords.get_shape()
    max_coor = tf.constant([shape[0] - 1, shape[1] - 1], dtype=tf.float32)

    coords = tf.clip_by_value(coords, 0., max_coor)  # borderMode==repeat
    coords = tf.cast(coords, tf.int32)

    batch_index = tf.range(batch, dtype=tf.int32)
    batch_index = tf.reshape(batch_index, [-1, 1, 1, 1])
    batch_index = tf.tile(batch_index, [1, shape2[0], shape2[1], 1])    # bxh2xw2x1
    indices = tf.concat([batch_index, coords], axis=3)  # bxh2xw2x3
    sampled = tf.gather_nd(img, indices)
    return sampled
Beispiel #6
0
def GroupNorm(x, group, gamma_initializer=tf.constant_initializer(1.)):
    """
    https://arxiv.org/abs/1803.08494
    More code that reproduces the paper can be found at https://github.com/ppwwyyxx/GroupNorm-reproduce/.
    """
    shape = x.get_shape().as_list()
    ndims = len(shape)
    assert ndims == 4, shape
    chan = shape[1]
    assert chan % group == 0, chan
    group_size = chan // group

    orig_shape = tf.shape(x)
    h, w = orig_shape[2], orig_shape[3]

    x = tf.reshape(x, tf.stack([-1, group, group_size, h, w]))

    mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True)

    new_shape = [1, group, group_size, 1, 1]

    beta = tf.get_variable('beta', [chan],
                           initializer=tf.constant_initializer())
    beta = tf.reshape(beta, new_shape)

    gamma = tf.get_variable('gamma', [chan], initializer=gamma_initializer)
    gamma = tf.reshape(gamma, new_shape)

    out = tf.nn.batch_normalization(x,
                                    mean,
                                    var,
                                    beta,
                                    gamma,
                                    1e-5,
                                    name='output')
    return tf.reshape(out, orig_shape, name='output')
Beispiel #7
0
    def roi_heads(self, image, features, proposals, targets):
        image_shape2d = tf.shape(image)[2:]  # h,w
        assert len(features) == 5, "Features have to be P23456!"
        gt_boxes, gt_labels, *_ = targets

        if self.training:
            proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes,
                                                 gt_labels)

        fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC)
        if not cfg.FPN.CASCADE:
            roi_feature_fastrcnn = multilevel_roi_align(
                features[:4], proposals.boxes, 7)

            head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn)
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
                'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CATEGORY)
            fastrcnn_head = FastRCNNHead(
                proposals, fastrcnn_box_logits, fastrcnn_label_logits,
                gt_boxes,
                tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))
        else:

            def roi_func(boxes):
                return multilevel_roi_align(features[:4], boxes, 7)

            fastrcnn_head = CascadeRCNNHead(proposals, roi_func,
                                            fastrcnn_head_func,
                                            (gt_boxes, gt_labels),
                                            image_shape2d,
                                            cfg.DATA.NUM_CATEGORY)

        if self.training:
            all_losses = fastrcnn_head.losses()

            if cfg.MODE_MASK:
                gt_masks = targets[2]
                # maskrcnn loss
                roi_feature_maskrcnn = multilevel_roi_align(
                    features[:4],
                    proposals.fg_boxes(),
                    14,
                    name_scope='multilevel_roi_align_mask')
                maskrcnn_head_func = getattr(model_mrcnn,
                                             cfg.FPN.MRCNN_HEAD_FUNC)
                mask_logits = maskrcnn_head_func(
                    'maskrcnn', roi_feature_maskrcnn,
                    cfg.DATA.NUM_CATEGORY)  # #fg x #cat x 28 x 28

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt,
                    28,
                    pad_border=False)  # fg x 1x28x28
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                all_losses.append(
                    maskrcnn_loss(mask_logits, proposals.fg_labels(),
                                  target_masks_for_fg))
            return all_losses
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(
                name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')
            if cfg.MODE_MASK:
                # Cascade inference needs roi transform with refined boxes.
                roi_feature_maskrcnn = multilevel_roi_align(
                    features[:4], final_boxes, 14)
                maskrcnn_head_func = getattr(model_mrcnn,
                                             cfg.FPN.MRCNN_HEAD_FUNC)
                mask_logits = maskrcnn_head_func(
                    'maskrcnn', roi_feature_maskrcnn,
                    cfg.DATA.NUM_CATEGORY)  # #fg x #cat x 28 x 28
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.cast(final_labels, tf.int32) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx28x28
                tf.sigmoid(final_mask_logits, name='output/masks')
            return []
Beispiel #8
0
    def roi_heads(self, image, features, proposals, targets):
        image_shape2d = tf.shape(image)[2:]  # h,w
        featuremap = features[0]

        gt_boxes, gt_labels, *_ = targets

        if self.training:
            # sample proposal boxes in training
            proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes,
                                                 gt_labels)
        # The boxes to be used to crop RoIs.
        # Use all proposal boxes in inference

        boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1])  # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CATEGORY)

        fastrcnn_head = FastRCNNHead(
            proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes,
            tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))

        if self.training:
            all_losses = fastrcnn_head.losses()

            if cfg.MODE_MASK:
                gt_masks = targets[2]
                # maskrcnn loss
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds())
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY,
                    num_convs=0)  # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt,
                    14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                all_losses.append(
                    maskrcnn_loss(mask_logits, proposals.fg_labels(),
                                  target_masks_for_fg))
            return all_losses
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(
                name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')

            if cfg.MODE_MASK:
                roi_resized = roi_align(
                    featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE),
                    14)
                feature_maskrcnn = resnet_conv5(
                    roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1])
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY,
                    0)  # #result x #cat x 14x14
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.cast(final_labels, tf.int32) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx14x14
                tf.sigmoid(final_mask_logits, name='output/masks')
            return []