Esempio n. 1
0
def multilevel_roi_align(features, rcnn_boxes, resolution):
    """
    Args:
        features ([tf.Tensor]): 4 FPN feature level 2-5
        rcnn_boxes (tf.Tensor): nx4 boxes
        resolution (int): output spatial resolution
    Returns:
        NxC x res x res
    """
    assert len(features) == 4, features
    # Reassign rcnn_boxes to levels
    level_ids, level_boxes = fpn_map_rois_to_levels(rcnn_boxes)
    all_rois = []

    # Crop patches from corresponding levels
    for i, boxes, featuremap in zip(itertools.count(), level_boxes, features):
        with tf.name_scope('roi_level{}'.format(i + 2)):
            boxes_on_featuremap = boxes * (1.0 / cfg.FPN.ANCHOR_STRIDES[i])
            all_rois.append(
                roi_align(featuremap, boxes_on_featuremap, resolution))

    # this can fail if using TF<=1.8 with MKL build
    all_rois = tf.concat(all_rois, axis=0)  # NCHW
    # Unshuffle to the original order, to match the original samples
    level_id_perm = tf.concat(level_ids, axis=0)  # A permutation of 1~N
    level_id_invert_perm = tf.invert_permutation(level_id_perm)
    all_rois = tf.gather(all_rois, level_id_invert_perm)
    return all_rois
Esempio n. 2
0
    def build_graph(self, *inputs):
        inputs = dict(zip(self.input_names, inputs))
        image = self.preprocess(inputs['image'])  # 1CHW

        # build resnet c4
        featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3])

        # ROI_align
        x, y, w, h = tf.split(inputs['gt_boxes'], 4, axis=1)
        gt_boxes = tf.concat([x, y, x + w, y + h], axis=1)
        boxes_on_featuremap = gt_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)  # ANCHOR_STRIDE = 16

        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)  # 14x14 for each roi

        person_labels = tf.ones_like(inputs['male'])
        feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])
        mask_logits = maskrcnn_upXconv_head(
            'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0)  # #result x #cat x 14x14
        indices = tf.stack([tf.range(tf.size(person_labels)), tf.to_int32(person_labels) - 1], axis=1)
        final_mask_logits = tf.gather_nd(mask_logits, indices)  # #resultx14x14
        final_mask_logits = tf.sigmoid(final_mask_logits, name='output/masks')

        final_mask_logits_expand = tf.expand_dims(final_mask_logits, axis=1)
        final_mask_logits_tile = tf.tile(final_mask_logits_expand, multiples=[1, 1024, 1, 1])
        fg_mask_roi_resized = tf.where(final_mask_logits_tile >= 0.5, roi_resized,
                                       roi_resized * 1.0)
        feature_attrs = resnet_conv5_attr(fg_mask_roi_resized,
                                          cfg.BACKBONE.RESNET_NUM_BLOCK[-1])

        feature_gap = GlobalAvgPooling('gap', feature_attrs, data_format='channels_first')  # ??
        # attrs_logits = attrs_head('attrs', feature_gap)
        attrs_labels = attrs_predict(feature_gap)
Esempio n. 3
0
    def build_graph(self, *inputs):
        inputs = dict(zip(self.input_names, inputs))
        image = self.preprocess(inputs['image'])  # 1CHW
        # build resnet c4
        featuremap = resnet_c4_backbone(image,
                                        cfg.BACKBONE.RESNET_NUM_BLOCK[:3])

        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap,
                                                    cfg.RPN.HEAD_DIM,
                                                    cfg.RPN.NUM_ANCHOR)
        # HEAD_DIM = 1024, NUM_ANCHOR = 15
        # rpn_label_logits: fHxfWxNA
        # rpn_box_logits: fHxfWxNAx4
        anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'],
                             inputs['anchor_boxes'])
        # anchor_boxes is Groundtruth boxes corresponding to each anchor
        anchors = anchors.narrow_to(featuremap)  # ??
        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = anchors.decode_logits(
            rpn_box_logits)  # fHxfWxNAx4, floatbox

        # ProposalCreator (get the topk proposals)
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]),
            image_shape2d,
            cfg.RPN.TEST_PRE_NMS_TOPK,  # 2000
            cfg.RPN.TEST_POST_NMS_TOPK)  # 1000
        x, y, w, h = tf.split(inputs['gt_boxes'], 4, axis=1)
        gt_boxes = tf.concat([x, y, x + w, y + h], axis=1)
        boxes_on_featuremap = gt_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE
                                          )  # ANCHOR_STRIDE = 16
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]
        )  # nxcx7x7 # RESNET_NUM_BLOCK = [3, 4, 6, 3]
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')  # ??

        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)  # ??
        # Returns:
        # cls_logits: Tensor("fastrcnn/class/output:0", shape=(n, 81), dtype=float32)
        # reg_logits: Tensor("fastrcnn/output_box:0", shape=(n, 81, 4), dtype=float32)

        # ------------------Fastrcnn_Head------------------------
        fastrcnn_head = FastRCNNHead(
            proposal_boxes,
            fastrcnn_box_logits,
            fastrcnn_label_logits,  #
            tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS,
                        dtype=tf.float32))  # [10., 10., 5., 5.]

        decoded_boxes = fastrcnn_head.decoded_output_boxes(
        )  # pre_boxes_on_images
        decoded_boxes = clip_boxes(decoded_boxes,
                                   image_shape2d,
                                   name='fastrcnn_all_boxes')

        label_scores = tf.nn.softmax(fastrcnn_label_logits,
                                     name='fastrcnn_all_scores')
        # class scores, summed to one for each box.

        final_boxes, final_scores, final_labels = fastrcnn_predictions(
            decoded_boxes, label_scores, name_scope='output')

        feature_maskrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]
        )  # nxcx7x7 # RESNET_NUM_BLOCK = [3, 4, 6, 3]
        # Keep C5 feature to be shared with mask branch
        mask_logits = maskrcnn_upXconv_head('maskrcnn', feature_maskrcnn,
                                            cfg.DATA.NUM_CATEGORY,
                                            0)  # #result x #cat x 14x14
        # Assume only person here
        person_labels = tf.ones_like(inputs['male'])
        indices = tf.stack(
            [tf.range(tf.size(person_labels)),
             tf.to_int32(person_labels) - 1],
            axis=1)
        final_mask_logits = tf.gather_nd(mask_logits, indices)  # #resultx14x14
        final_mask_logits = tf.sigmoid(final_mask_logits, name='output/masks')
        mask = False
        if mask:
            final_mask_logits_expand = tf.expand_dims(final_mask_logits,
                                                      axis=1)
            final_mask_logits_tile = tf.tile(final_mask_logits_expand,
                                             multiples=[1, 1024, 1, 1])
            fg_roi_resized = tf.where(final_mask_logits_tile >= 0.5,
                                      roi_resized, roi_resized * 1.0)
            feature_attrs = resnet_conv5_attr(
                fg_roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])
        else:
            feature_attrs = resnet_conv5_attr(
                roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])

        feature_attrs_gap = GlobalAvgPooling('gap',
                                             feature_attrs,
                                             data_format='channels_first')

        attrs_logits = attrs_head('attrs', feature_attrs_gap)
        attrs_loss = all_attrs_losses(inputs, attrs_logits, attr_losses_v2)

        all_losses = [attrs_loss]
        # male loss
        wd_cost = regularize_cost('.*/W',
                                  l2_regularizer(cfg.TRAIN.WEIGHT_DECAY),
                                  name='wd_cost')
        all_losses.append(wd_cost)
        total_cost = tf.add_n(all_losses, 'total_cost')

        add_moving_summary(wd_cost, total_cost)
        return total_cost
Esempio n. 4
0
    def build_graph(self, *inputs):
        inputs = dict(zip(self.input_names, inputs))
        is_training = get_current_tower_context().is_training
        image = self.preprocess(inputs['image'])  # 1CHW

        featuremap = resnet_c4_backbone(image,
                                        cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap,
                                                    cfg.RPN.HEAD_DIM,
                                                    cfg.RPN.NUM_ANCHOR)

        anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'],
                             inputs['anchor_boxes'])
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = anchors.decode_logits(
            rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if is_training else cfg.RPN.TEST_POST_NMS_TOPK)

        gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels']
        if is_training:
            # sample proposal boxes in training
            proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes,
                                                 gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            proposals = BoxProposals(proposal_boxes)

        boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)

        fastrcnn_head = FastRCNNHead(
            proposals, fastrcnn_box_logits, fastrcnn_label_logits,
            tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))

        if is_training:
            all_losses = []
            # rpn loss
            all_losses.extend(
                rpn_losses(anchors.gt_labels, anchors.encoded_gt_boxes(),
                           rpn_label_logits, rpn_box_logits))

            # fastrcnn loss
            all_losses.extend(fastrcnn_head.losses())

            if cfg.MODE_MASK:
                # maskrcnn loss
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds())
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY,
                    num_convs=0)  # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(inputs['gt_masks'], 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt,
                    14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                all_losses.append(
                    maskrcnn_loss(mask_logits, proposals.fg_labels(),
                                  target_masks_for_fg))

            wd_cost = regularize_cost('.*/W',
                                      l2_regularizer(cfg.TRAIN.WEIGHT_DECAY),
                                      name='wd_cost')
            all_losses.append(wd_cost)

            total_cost = tf.add_n(all_losses, 'total_cost')
            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(
                name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')

            if cfg.MODE_MASK:
                roi_resized = roi_align(
                    featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE),
                    14)
                feature_maskrcnn = resnet_conv5(
                    roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY,
                    0)  # #result x #cat x 14x14
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.to_int32(final_labels) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx14x14
                tf.sigmoid(final_mask_logits, name='output/masks')
Esempio n. 5
0
    def build_graph(self, *inputs):
        inputs = dict(zip(self.input_names, inputs))
        image = self.preprocess(inputs['image'])  # 1CHW

        # build resnet c4
        featuremap = resnet_c4_backbone(image,
                                        cfg.BACKBONE.RESNET_NUM_BLOCK[:3])

        # build rpn
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap,
                                                    cfg.RPN.HEAD_DIM,
                                                    cfg.RPN.NUM_ANCHOR)
        # HEAD_DIM = 1024, NUM_ANCHOR = 15
        # rpn_label_logits: fHxfWxNA
        # rpn_box_logits: fHxfWxNAx4
        anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'],
                             inputs['anchor_boxes'])
        # anchor_boxes is Groundtruth boxes corresponding to each anchor
        anchors = anchors.narrow_to(featuremap)
        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = anchors.decode_logits(
            rpn_box_logits)  # fHxfWxNAx4, floatbox

        # ProposalCreator (get the topk proposals)
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]),
            image_shape2d,
            cfg.RPN.TEST_PRE_NMS_TOPK,  # 6000
            cfg.RPN.TEST_POST_NMS_TOPK)  # 1000

        boxes_on_featuremap = proposal_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE
                                                )  # ANCHOR_STRIDE = 16

        # ROI_align
        roi_resized = roi_align(featuremap, boxes_on_featuremap,
                                14)  # 14x14 for each roi

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]
        )  # nxcx7x7 # RESNET_NUM_BLOCK = [3, 4, 6, 3]
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')

        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)
        # Returns:
        # cls_logits: Tensor("fastrcnn/class/output:0", shape=(n, 81), dtype=float32)
        # reg_logits: Tensor("fastrcnn/output_box:0", shape=(n, 81, 4), dtype=float32)

        # ------------------Fastrcnn_Head------------------------
        proposals = BoxProposals(proposal_boxes)
        fastrcnn_head = FastRCNNHead(
            proposals,
            fastrcnn_box_logits,
            fastrcnn_label_logits,  #
            tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS,
                        dtype=tf.float32))  # [10., 10., 5., 5.]

        decoded_boxes = fastrcnn_head.decoded_output_boxes(
        )  # pre_boxes_on_images
        decoded_boxes = clip_boxes(decoded_boxes,
                                   image_shape2d,
                                   name='fastrcnn_all_boxes')

        label_scores = tf.nn.softmax(fastrcnn_label_logits,
                                     name='fastrcnn_all_scores')
        # class scores, summed to one for each box.

        final_boxes, final_scores, final_labels = fastrcnn_predictions(
            decoded_boxes, label_scores, name_scope='output')
Esempio n. 6
0
    def build_graph(self, *inputs):
        inputs = dict(zip(self.input_names, inputs))
        image = self.preprocess(inputs['image'])  # 1CHW

        # build resnet c4
        featuremap = resnet_c4_backbone(image,
                                        cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        #
        # # build rpn
        # rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR)
        # # HEAD_DIM = 1024, NUM_ANCHOR = 15
        # # rpn_label_logits: fHxfWxNA
        # # rpn_box_logits: fHxfWxNAx4
        # anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes'])
        # # anchor_boxes is Groundtruth boxes corresponding to each anchor
        # anchors = anchors.narrow_to(featuremap)
        # image_shape2d = tf.shape(image)[2:]  # h,w
        # pred_boxes_decoded = anchors.decode_logits(rpn_box_logits)  # fHxfWxNAx4, floatbox
        #
        # # ProposalCreator (get the topk proposals)
        # proposal_boxes, proposal_scores = generate_rpn_proposals(
        #     tf.reshape(pred_boxes_decoded, [-1, 4]),
        #     tf.reshape(rpn_label_logits, [-1]),
        #     image_shape2d,
        #     cfg.RPN.TEST_PRE_NMS_TOPK,  # 2000
        #     cfg.RPN.TEST_POST_NMS_TOPK)  # 1000
        #
        # boxes_on_featuremap = proposal_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)  # ANCHOR_STRIDE = 16
        #
        # # ROI_align
        # roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)  # 14x14 for each roi
        #
        # feature_fastrcnn = resnet_conv5(roi_resized,
        #                                 cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7 # RESNET_NUM_BLOCK = [3, 4, 6, 3]
        # # Keep C5 feature to be shared with mask branch
        # feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first')
        #
        # fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)
        # # Returns:
        # # cls_logits: Tensor("fastrcnn/class/output:0", shape=(n, 81), dtype=float32)
        # # reg_logits: Tensor("fastrcnn/output_box:0", shape=(n, 81, 4), dtype=float32)
        #
        # # ------------------Fastrcnn_Head------------------------
        # proposals = BoxProposals(proposal_boxes)
        # fastrcnn_head = FastRCNNHead(proposals, fastrcnn_box_logits, fastrcnn_label_logits,  #
        #                              tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))  # [10., 10., 5., 5.]
        #
        # decoded_boxes = fastrcnn_head.decoded_output_boxes()  # pre_boxes_on_images
        # decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes')
        #
        # label_scores = tf.nn.softmax(fastrcnn_label_logits, name='fastrcnn_all_scores')
        # # class scores, summed to one for each box.
        #
        # final_boxes, final_scores, final_labels = fastrcnn_predictions(
        #     decoded_boxes, label_scores, name_scope='output')
        #
        # person_slice = tf.where(final_labels <= 1)
        # person_labels = tf.gather(final_labels, person_slice)
        # final_person_labels = tf.reshape(person_labels, (-1,), name='person_labels')
        #
        # person_boxes = tf.gather(final_boxes, person_slice)
        # final_person_boxes = tf.reshape(person_boxes, (-1, 4), name='person_boxes')
        #
        # person_scores = tf.gather(final_scores, person_slice)
        # tf.reshape(person_scores, (-1,), name='person_scores')
        #
        # # Attributes branch
        x1, y1, w, h = tf.split(inputs['gt_boxes'], 4, axis=1)
        gt_boxes = tf.concat([x1, y1, x1 + w, y1 + h], axis=1)
        person_roi_resized = roi_align(
            featuremap, gt_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14)
        feature_attrs = resnet_conv5_attr(person_roi_resized,
                                          cfg.BACKBONE.RESNET_NUM_BLOCK[-1])
        feature_attrs_gap = GlobalAvgPooling('gap',
                                             feature_attrs,
                                             data_format='channels_first')  #
        attrs_predict(feature_attrs_gap)