Ejemplos de clip_boxes en Python

Lenguaje de programación: Python

Namespace/Package Name: model

Método / Función: clip_boxes

Ejemplos en hotexamples.com: 7

Python clip_boxes - 7 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de model.clip_boxes extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

    def fastrcnn_inference(self, image_shape2d, rcnn_boxes, rcnn_label_logits,
                           rcnn_box_logits):
        """
        Args:
            image_shape2d: h, w
            rcnn_boxes (nx4): the proposal boxes
            rcnn_label_logits (n):
            rcnn_box_logits (nx4):

        Returns:
            boxes (mx4):
            labels (m): each >= 1
        """
        label_probs = tf.nn.softmax(
            rcnn_label_logits, name='fastrcnn_all_probs')  # #proposal x #Class
        anchors = tf.tile(tf.expand_dims(rcnn_boxes, 1),
                          [1, config.NUM_CLASS - 1, 1])  # #proposal x #Cat x 4
        decoded_boxes = decode_bbox_target(
            rcnn_box_logits / tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS),
            anchors)
        decoded_boxes = clip_boxes(decoded_boxes,
                                   image_shape2d,
                                   name='fastrcnn_all_boxes')

        # indices: Nx2. Each index into (#proposal, #category)
        pred_indices, final_probs = fastrcnn_predictions(
            decoded_boxes, label_probs)
        final_probs = tf.identity(final_probs, 'final_probs')
        final_boxes = tf.gather_nd(decoded_boxes,
                                   pred_indices,
                                   name='final_boxes')
        final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels')
        return final_boxes, final_labels

Ejemplo n.º 2

Mostrar archivo

Archivo: train.py Proyecto: wu-yy/tensorpack

    def fastrcnn_inference(self, image_shape2d,
                           rcnn_boxes, rcnn_label_logits, rcnn_box_logits):
        """
        Args:
            image_shape2d: h, w
            rcnn_boxes (nx4): the proposal boxes
            rcnn_label_logits (n):
            rcnn_box_logits (nx4):

        Returns:
            boxes (mx4):
            labels (m): each >= 1
        """
        label_probs = tf.nn.softmax(rcnn_label_logits, name='fastrcnn_all_probs')  # #proposal x #Class
        anchors = tf.tile(tf.expand_dims(rcnn_boxes, 1), [1, config.NUM_CLASS - 1, 1])   # #proposal x #Cat x 4
        decoded_boxes = decode_bbox_target(
            rcnn_box_logits /
            tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors)
        decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes')

        # indices: Nx2. Each index into (#proposal, #category)
        pred_indices, final_probs = fastrcnn_predictions(decoded_boxes, label_probs)
        final_probs = tf.identity(final_probs, 'final_probs')
        final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes')
        final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels')
        return final_boxes, final_labels

Ejemplo n.º 3

Mostrar archivo

    def _build_graph(self, inputs):
        is_training = get_current_tower_context().is_training
        if config.MODE_MASK:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs
        else:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        fm_anchors = self._get_anchors(image)
        image = self._preprocess(image)  # 1CHW
        image_shape2d = tf.shape(image)[2:]

        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)
        featuremap = pretrained_resnet_conv4(image,
                                             config.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024,
                                                    config.NUM_ANCHOR)

        decoded_boxes = decode_bbox_target(rpn_box_logits,
                                           fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(decoded_boxes, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]), image_shape2d)

        if is_training:
            # sample proposal boxes in training
            rcnn_sampled_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
            boxes_on_featuremap = rcnn_sampled_boxes * (1.0 /
                                                        config.ANCHOR_STRIDE)
        else:
            # use all proposal boxes in inference
            boxes_on_featuremap = proposal_boxes * (1.0 / config.ANCHOR_STRIDE)

        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
        def ff_true():
            feature_fastrcnn = resnet_conv5(
                roi_resized, config.RESNET_NUM_BLOCK[-1])  # nxcx7x7
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head(
                'fastrcnn', feature_fastrcnn, config.NUM_CLASS)
            return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits

        def ff_false():
            ncls = config.NUM_CLASS
            return tf.zeros([0, 2048, 7,
                             7]), tf.zeros([0,
                                            ncls]), tf.zeros([0, ncls - 1, 4])

        feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond(
            tf.size(boxes_on_featuremap) > 0, ff_true, ff_false)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels,
                                                      anchor_boxes_encoded,
                                                      rpn_label_logits,
                                                      rpn_box_logits)

            # fastrcnn loss
            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0),
                                            [-1])  # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_sampled_boxes,
                                         fg_inds_wrt_sample)

            with tf.name_scope('fg_sample_patch_viz'):
                fg_sampled_patches = crop_and_resize(
                    image, fg_sampled_boxes,
                    tf.zeros_like(fg_inds_wrt_sample, dtype=tf.int32), 300)
                fg_sampled_patches = tf.transpose(fg_sampled_patches,
                                                  [0, 2, 3, 1])
                fg_sampled_patches = tf.reverse(fg_sampled_patches,
                                                axis=[-1])  # BGR->RGB
                tf.summary.image('viz', fg_sampled_patches, max_outputs=30)

            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)
            encoded_boxes = encode_bbox_target(
                matched_gt_boxes, fg_sampled_boxes) * tf.constant(
                    config.FASTRCNN_BBOX_REG_WEIGHTS)
            fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses(
                rcnn_labels, fastrcnn_label_logits, encoded_boxes,
                tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample))

            if config.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
                mask_logits = maskrcnn_head(
                    'maskrcnn', fg_feature,
                    config.NUM_CLASS)  # #fg x #cat x 14x14

                gt_masks_for_fg = tf.gather(gt_masks,
                                            fg_inds_wrt_gt)  # nfg x H x W
                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks_for_fg, 1), fg_sampled_boxes,
                    tf.range(tf.size(fg_inds_wrt_gt)), 14)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels,
                                           target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(1e-4),
                name='wd_cost')

            self.cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, mrcnn_loss, wd_cost
            ], 'total_cost')

            add_moving_summary(self.cost, wd_cost)
        else:
            label_probs = tf.nn.softmax(
                fastrcnn_label_logits,
                name='fastrcnn_all_probs')  # #proposal x #Class
            anchors = tf.tile(
                tf.expand_dims(proposal_boxes, 1),
                [1, config.NUM_CLASS - 1, 1])  # #proposal x #Cat x 4
            decoded_boxes = decode_bbox_target(
                fastrcnn_box_logits /
                tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors)
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')

            # indices: Nx2. Each index into (#proposal, #category)
            pred_indices, final_probs = fastrcnn_predictions(
                decoded_boxes, label_probs)
            final_probs = tf.identity(final_probs, 'final_probs')
            final_boxes = tf.gather_nd(decoded_boxes,
                                       pred_indices,
                                       name='final_boxes')
            final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels')

            if config.MODE_MASK:
                # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
                def f1():
                    roi_resized = roi_align(
                        featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE),
                        14)
                    feature_maskrcnn = resnet_conv5(
                        roi_resized, config.RESNET_NUM_BLOCK[-1])
                    mask_logits = maskrcnn_head(
                        'maskrcnn', feature_maskrcnn,
                        config.NUM_CLASS)  # #result x #cat x 14x14
                    indices = tf.stack([
                        tf.range(tf.size(final_labels)),
                        tf.to_int32(final_labels) - 1
                    ],
                                       axis=1)
                    final_mask_logits = tf.gather_nd(mask_logits,
                                                     indices)  # #resultx14x14
                    return tf.sigmoid(final_mask_logits)

                final_masks = tf.cond(
                    tf.size(final_probs) > 0, f1,
                    lambda: tf.zeros([0, 14, 14]))
                tf.identity(final_masks, name='final_masks')

Ejemplo n.º 4

Mostrar archivo

Archivo: train.py Proyecto: caiwenpu/tensorpack

    def _build_graph(self, inputs):
        is_training = get_current_tower_context().is_training
        if config.MODE_MASK:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs
        else:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        fm_anchors = self._get_anchors(image)
        image = self._preprocess(image)     # 1CHW
        image_shape2d = tf.shape(image)[2:]

        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)
        featuremap = pretrained_resnet_conv4(image, config.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR)

        decoded_boxes = decode_bbox_target(rpn_box_logits, fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(decoded_boxes, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]),
            image_shape2d)

        if is_training:
            # sample proposal boxes in training
            rcnn_sampled_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
            boxes_on_featuremap = rcnn_sampled_boxes * (1.0 / config.ANCHOR_STRIDE)
        else:
            # use all proposal boxes in inference
            boxes_on_featuremap = proposal_boxes * (1.0 / config.ANCHOR_STRIDE)

        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
        def ff_true():
            feature_fastrcnn = resnet_conv5(roi_resized, config.RESNET_NUM_BLOCK[-1])    # nxcx7x7
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head('fastrcnn', feature_fastrcnn, config.NUM_CLASS)
            return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits

        def ff_false():
            ncls = config.NUM_CLASS
            return tf.zeros([0, 2048, 7, 7]), tf.zeros([0, ncls]), tf.zeros([0, ncls - 1, 4])

        feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond(
            tf.size(boxes_on_featuremap) > 0, ff_true, ff_false)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(
                anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits)

            # fastrcnn loss
            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1])   # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_sampled_boxes, fg_inds_wrt_sample)

            with tf.name_scope('fg_sample_patch_viz'):
                fg_sampled_patches = crop_and_resize(
                    image, fg_sampled_boxes,
                    tf.zeros_like(fg_inds_wrt_sample, dtype=tf.int32), 300)
                fg_sampled_patches = tf.transpose(fg_sampled_patches, [0, 2, 3, 1])
                fg_sampled_patches = tf.reverse(fg_sampled_patches, axis=[-1])  # BGR->RGB
                tf.summary.image('viz', fg_sampled_patches, max_outputs=30)

            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)
            encoded_boxes = encode_bbox_target(
                matched_gt_boxes,
                fg_sampled_boxes) * tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS)
            fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses(
                rcnn_labels, fastrcnn_label_logits,
                encoded_boxes,
                tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample))

            if config.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
                mask_logits = maskrcnn_head('maskrcnn', fg_feature, config.NUM_CLASS)   # #fg x #cat x 14x14

                gt_masks_for_fg = tf.gather(gt_masks, fg_inds_wrt_gt)  # nfg x H x W
                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks_for_fg, 1),
                    fg_sampled_boxes,
                    tf.range(tf.size(fg_inds_wrt_gt)), 14)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(1e-4), name='wd_cost')

            self.cost = tf.add_n([
                rpn_label_loss, rpn_box_loss,
                fastrcnn_label_loss, fastrcnn_box_loss,
                mrcnn_loss,
                wd_cost], 'total_cost')

            add_moving_summary(self.cost, wd_cost)
        else:
            label_probs = tf.nn.softmax(fastrcnn_label_logits, name='fastrcnn_all_probs')  # #proposal x #Class
            anchors = tf.tile(tf.expand_dims(proposal_boxes, 1), [1, config.NUM_CLASS - 1, 1])   # #proposal x #Cat x 4
            decoded_boxes = decode_bbox_target(
                fastrcnn_box_logits /
                tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors)
            decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes')

            # indices: Nx2. Each index into (#proposal, #category)
            pred_indices, final_probs = fastrcnn_predictions(decoded_boxes, label_probs)
            final_probs = tf.identity(final_probs, 'final_probs')
            final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes')
            final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels')

            if config.MODE_MASK:
                # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
                def f1():
                    roi_resized = roi_align(featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE), 14)
                    feature_maskrcnn = resnet_conv5(roi_resized, config.RESNET_NUM_BLOCK[-1])
                    mask_logits = maskrcnn_head(
                        'maskrcnn', feature_maskrcnn, config.NUM_CLASS)   # #result x #cat x 14x14
                    indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1)
                    final_mask_logits = tf.gather_nd(mask_logits, indices)   # #resultx14x14
                    return tf.sigmoid(final_mask_logits)

                final_masks = tf.cond(tf.size(final_probs) > 0, f1, lambda: tf.zeros([0, 14, 14]))
                tf.identity(final_masks, name='final_masks')

Ejemplo n.º 5

Mostrar archivo

    def _build_graph(self, inputs):
        is_training = get_current_tower_context().is_training
        image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        fm_anchors = self._get_anchors(image)
        image = self._preprocess(image)  # 1CHW
        image_shape2d = tf.shape(image)[2:]

        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)
        featuremap = pretrained_resnet_conv4(image,
                                             config.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024,
                                                    config.NUM_ANCHOR)

        decoded_boxes = decode_bbox_target(rpn_box_logits,
                                           fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(decoded_boxes, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]), image_shape2d)

        if is_training:
            # sample proposal boxes in training
            rcnn_sampled_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
            boxes_on_featuremap = rcnn_sampled_boxes * (1.0 /
                                                        config.ANCHOR_STRIDE)
        else:
            # use all proposal boxes in inference
            boxes_on_featuremap = proposal_boxes * (1.0 / config.ANCHOR_STRIDE)

        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)
        feature_fastrcnn = resnet_conv5(roi_resized,
                                        config.RESNET_NUM_BLOCK[-1])  # nxcx7x7
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head(
            'fastrcnn', feature_fastrcnn, config.NUM_CLASS)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels,
                                                      anchor_boxes_encoded,
                                                      rpn_label_logits,
                                                      rpn_box_logits)

            # fastrcnn loss
            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0),
                                            [-1])  # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_sampled_boxes,
                                         fg_inds_wrt_sample)

            with tf.name_scope('fg_sample_patch_viz'):
                fg_sampled_patches = crop_and_resize(
                    image, fg_sampled_boxes,
                    tf.zeros_like(fg_inds_wrt_sample, dtype=tf.int32), 300)
                fg_sampled_patches = tf.transpose(fg_sampled_patches,
                                                  [0, 2, 3, 1])
                tf.summary.image('viz', fg_sampled_patches, max_outputs=30)

            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)
            encoded_boxes = encode_bbox_target(
                matched_gt_boxes, fg_sampled_boxes) * tf.constant(
                    config.FASTRCNN_BBOX_REG_WEIGHTS)
            fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses(
                rcnn_labels, fastrcnn_label_logits, encoded_boxes,
                tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample))

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn)/.*W',
                l2_regularizer(1e-4),
                name='wd_cost')

            self.cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, wd_cost
            ], 'total_cost')

            add_moving_summary(self.cost, wd_cost)
        else:
            label_probs = tf.nn.softmax(
                fastrcnn_label_logits,
                name='fastrcnn_all_probs')  # #proposal x #Class
            anchors = tf.tile(
                tf.expand_dims(proposal_boxes, 1),
                [1, config.NUM_CLASS - 1, 1])  # #proposal x #Cat x 4
            decoded_boxes = decode_bbox_target(
                fastrcnn_box_logits /
                tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors)
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')

            # indices: Nx2. Each index into (#proposal, #category)
            pred_indices, final_probs = fastrcnn_predictions(
                decoded_boxes, label_probs)
            final_probs = tf.identity(final_probs, 'final_probs')
            tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes')
            tf.add(pred_indices[:, 1], 1, name='final_labels')

Ejemplo n.º 6

Mostrar archivo

Archivo: train.py Proyecto: aljosaosep/mprcnn

    def _build_graph(self, inputs):
        is_training = get_current_tower_context().is_training

        if config.USE_SECOND_HEAD:
            # USE SECOND HEAD
            if config.MODE_MASK:
                if config.PROVIDE_BOXES_AS_INPUT:
                    image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, second_gt_labels, gt_masks, input_boxes = inputs
                else:
                    image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, second_gt_labels, gt_masks = inputs
            else:
                image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, second_gt_labels = inputs
        else:
            # NO SECOND HEAD
            if config.MODE_MASK:
                if config.PROVIDE_BOXES_AS_INPUT:
                    image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks, input_boxes = inputs
                else:
                    image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs
            else:
                image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
            second_gt_labels = None

        fm_anchors = self._get_anchors(image)
        image_shape2d_before_resize = tf.shape(image)[:2]
        image = self._preprocess(image)  # 1CHW
        image_shape2d = tf.shape(image)[2:]

        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)
        featuremap = pretrained_resnet_conv4(image,
                                             config.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024,
                                                    config.NUM_ANCHOR)

        decoded_boxes = decode_bbox_target(rpn_box_logits,
                                           fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(decoded_boxes, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]), image_shape2d)

        if config.PROVIDE_BOXES_AS_INPUT:
            old_height, old_width = image_shape2d_before_resize[
                0], image_shape2d_before_resize[1]
            new_height, new_width = image_shape2d[0], image_shape2d[1]
            height_scale = new_height / old_height
            width_scale = new_width / old_width
            # TODO: check the order of dimensions!
            scale = tf.stack(
                [width_scale, height_scale, width_scale, height_scale], axis=0)
            proposal_boxes = input_boxes * tf.cast(scale, tf.float32)

        secondclassification_labels = None
        if is_training:
            # sample proposal boxes in training
            rcnn_sampled_boxes, rcnn_labels, fg_inds_wrt_gt, bg_inds = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
            if config.USE_SECOND_HEAD:
                secondclassification_labels = tf.stop_gradient(
                    tf.concat([
                        tf.gather(second_gt_labels, fg_inds_wrt_gt),
                        tf.zeros_like(bg_inds, dtype=tf.int64)
                    ],
                              axis=0,
                              name='second_sampled_labels'))
            boxes_on_featuremap = rcnn_sampled_boxes * (1.0 /
                                                        config.ANCHOR_STRIDE)
        else:
            # use all proposal boxes in inference
            boxes_on_featuremap = proposal_boxes * (1.0 / config.ANCHOR_STRIDE)
            rcnn_labels = rcnn_sampled_boxes = fg_inds_wrt_gt = None

        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
        def ff_true():
            feature_fastrcnn = resnet_conv5(
                roi_resized, config.RESNET_NUM_BLOCK[-1])  # nxcx7x7
            if config.TRAIN_HEADS_ONLY:
                feature_fastrcnn = tf.stop_gradient(feature_fastrcnn)
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head(
                'fastrcnn', feature_fastrcnn, config.NUM_CLASS)
            return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits

        def ff_false():
            ncls = config.NUM_CLASS
            return tf.zeros([0, 2048, 7,
                             7]), tf.zeros([0,
                                            ncls]), tf.zeros([0, ncls - 1, 4])

        feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond(
            tf.size(boxes_on_featuremap) > 0, ff_true, ff_false)

        feature_fastrcnn_pooled = tf.reduce_mean(feature_fastrcnn, axis=[2, 3])

        if config.USE_SECOND_HEAD:

            def if_true():
                secondclassification_label_logits = secondclassification_head(
                    'secondclassification', feature_fastrcnn,
                    config.SECOND_NUM_CLASS)
                return secondclassification_label_logits

            def if_false():
                ncls = config.SECOND_NUM_CLASS
                return tf.zeros([0, ncls])

            secondclassification_label_logits = tf.cond(
                tf.size(boxes_on_featuremap) > 0, if_true, if_false)
        else:
            secondclassification_label_logits = None

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels,
                                                      anchor_boxes_encoded,
                                                      rpn_label_logits,
                                                      rpn_box_logits)

            # fastrcnn loss
            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0),
                                            [-1])  # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_sampled_boxes,
                                         fg_inds_wrt_sample)

            with tf.name_scope('fg_sample_patch_viz'):
                fg_sampled_patches = crop_and_resize(
                    image, fg_sampled_boxes,
                    tf.zeros_like(fg_inds_wrt_sample, dtype=tf.int32), 300)
                fg_sampled_patches = tf.transpose(fg_sampled_patches,
                                                  [0, 2, 3, 1])
                fg_sampled_patches = tf.reverse(fg_sampled_patches,
                                                axis=[-1])  # BGR->RGB
                tf.summary.image('viz', fg_sampled_patches, max_outputs=30)

            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)
            encoded_boxes = encode_bbox_target(
                matched_gt_boxes, fg_sampled_boxes) * tf.constant(
                    config.FASTRCNN_BBOX_REG_WEIGHTS)
            fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses(
                rcnn_labels, fastrcnn_label_logits, encoded_boxes,
                tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample))

            if config.USE_SECOND_HEAD:
                secondclassification_label_loss = secondclassification_losses(
                    secondclassification_labels,
                    secondclassification_label_logits)
            else:
                secondclassification_label_loss = None

            if config.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
                mask_logits = maskrcnn_head(
                    'maskrcnn', fg_feature,
                    config.NUM_CLASS)  # #fg x #cat x 14x14

                gt_masks_for_fg = tf.gather(gt_masks,
                                            fg_inds_wrt_gt)  # nfg x H x W
                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks_for_fg, 1), fg_sampled_boxes,
                    tf.range(tf.size(fg_inds_wrt_gt)), 14)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels,
                                           target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(1e-4),
                name='wd_cost')

            if config.TRAIN_HEADS_ONLY:
                # don't include the rpn loss
                if config.USE_SECOND_HEAD:
                    self.cost = tf.add_n([
                        fastrcnn_label_loss, fastrcnn_box_loss,
                        secondclassification_label_loss, mrcnn_loss, wd_cost
                    ], 'total_cost')
                else:
                    self.cost = tf.add_n([
                        fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss,
                        wd_cost
                    ], 'total_cost')
            else:
                if config.USE_SECOND_HEAD:
                    self.cost = tf.add_n([
                        rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                        fastrcnn_box_loss, secondclassification_label_loss,
                        mrcnn_loss, wd_cost
                    ], 'total_cost')
                else:
                    self.cost = tf.add_n([
                        rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                        fastrcnn_box_loss, mrcnn_loss, wd_cost
                    ], 'total_cost')

            add_moving_summary(self.cost, wd_cost)
        else:
            label_probs = tf.nn.softmax(
                fastrcnn_label_logits,
                name='fastrcnn_all_probs')  # #proposal x #Class
            anchors = tf.tile(
                tf.expand_dims(proposal_boxes, 1),
                [1, config.NUM_CLASS - 1, 1])  # #proposal x #Cat x 4
            decoded_boxes = decode_bbox_target(
                fastrcnn_box_logits /
                tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors)
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')

            # indices: Nx2. Each index into (#proposal, #category)
            pred_indices, final_probs = fastrcnn_predictions(
                decoded_boxes, label_probs)
            final_probs = tf.identity(final_probs, 'final_probs')
            final_boxes = tf.gather_nd(decoded_boxes,
                                       pred_indices,
                                       name='final_boxes')

            pred_indices_only_boxes = pred_indices[:,
                                                   0]  # bugfix (?): changing this 1->0 fixes quite some issues. Why?
            final_labels = tf.add(pred_indices[:, 1], 1,
                                  name='final_labels')  # pred_indices[:, 1]
            final_posterior = tf.gather(label_probs,
                                        pred_indices_only_boxes,
                                        name='final_posterior')

            feature_fastrcnn_pooled = tf.gather(feature_fastrcnn_pooled,
                                                pred_indices_only_boxes,
                                                name="feature_fastrcnn_pooled")

            if config.USE_SECOND_HEAD:
                second_label_probs = tf.nn.softmax(
                    secondclassification_label_logits,
                    name='secondclassification_all_probs'
                )  # #proposal x #Class
                second_final_posterior = tf.gather(
                    second_label_probs,
                    pred_indices_only_boxes,
                    name='second_final_posterior')
                second_final_labels = tf.add(tf.argmax(second_final_posterior,
                                                       -1),
                                             1,
                                             name='second_final_labels')
            if config.MODE_MASK:
                # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
                def f1():
                    roi_resized = roi_align(
                        featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE),
                        14)
                    feature_maskrcnn = resnet_conv5(
                        roi_resized, config.RESNET_NUM_BLOCK[-1])
                    mask_logits = maskrcnn_head(
                        'maskrcnn', feature_maskrcnn,
                        config.NUM_CLASS)  # #result x #cat x 14x14
                    indices = tf.stack([
                        tf.range(tf.size(final_labels)),
                        tf.to_int32(final_labels) - 1
                    ],
                                       axis=1)
                    final_mask_logits = tf.gather_nd(mask_logits,
                                                     indices)  # #resultx14x14
                    return tf.sigmoid(final_mask_logits)

                final_masks = tf.cond(
                    tf.size(final_probs) > 0, f1,
                    lambda: tf.zeros([0, 14, 14]))
                tf.identity(final_masks, name='final_masks')

Ejemplo n.º 7

Mostrar archivo

Archivo: demo.py Proyecto: coderSkyChen/object-detection-zoo

        if args.TEST_BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if args.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(args.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(args.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(args.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(args.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im2show = np.copy(im)
        for j in xrange(1, len(pascal_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)