Ejemplo n.º 1
0
    def forward(self, feed_dict):
        # prediction_dict = {}
        output_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'base_feat': base_feat})
        self.add_feat('base_feat', base_feat)

        # rpn model
        output_dict.update(self.rpn_model.forward(feed_dict))
        proposals = output_dict['proposals']
        multi_stage_loss_units = []
        for i in range(self.num_stages):

            if self.training:
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals

                # proposals_dict, loss_units = self.instance.target_generators[
                # i].generate_targets(output_dict, feed_dict, auxiliary_dict)
                losses = self.instance_info.generate_losses(
                    output_dict, feed_dict, auxiliary_dict)

                losses, subsampled_mask = self.sampler.subsample_losses(losses)
                proposals, _ = self.sampler.subsample_outputs(
                    proposals, subsampled_mask)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                # proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.rcnn_pooling(base_feat, rois.view(-1, 5))

            # shape(N,C,1,1)
            pooled_feat = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat = pooled_feat.mean(3).mean(2)

            # rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat)
            # rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat)

            for attr_name in self.branches:
                attr_preds = self.branches[attr_name][i](pooled_feat)
                output_dict[attr_name] = attr_preds

            # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)
            # batch_size = rois.shape[0]
            # rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
            # self.n_classes)
            # rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
            # self.n_classes)
            # rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            # output_dict.update({constants.KEY_})

            if self.training:
                losses.update_from_output(output_dict)

            # decode
            instance = self.instance_info.generate_instance(output_dict)

            # decode for next stage
            # coder = bbox_coders.build({'type': constants.KEY_BOXES_2D})
            # proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach()

        if self.training:
            # prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            return losses, stats
        else:
            return instance
Ejemplo n.º 2
0
    def forward(self, feed_dict):
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                # proposals_dict
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None
                gt_dict[constants.KEY_CORNERS_3D_GRNET] = None
                # gt_dict[constants.KEY_CORNERS_VISIBILITY] = None
                # gt_dict[constants.KEY_ORIENTS_V2] = None
                gt_dict[constants.KEY_DIMS] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                    constants.KEY_STEREO_CALIB_P2]
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
                    constants.KEY_LABEL_BOXES_3D]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals
                auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[
                    constants.KEY_MEAN_DIMS]
                auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[
                    constants.KEY_IMAGE_INFO]

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat = pooled_feat.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat)
            rcnn_corners_preds = self.rcnn_corners_preds[i](pooled_feat)
            # rcnn_visibility_preds = self.rcnn_visibility_preds[i](pooled_feat)
            rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)
            if not self.class_agnostic:
                # import ipdb
                # ipdb.set_trace()
                if self.training:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1))

                else:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1))

            # not class_agnostic for dims
            # import ipdb
            # ipdb.set_trace()
            if not self.class_agnostic_3d:
                if self.training:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1),
                        out_c=3)
                else:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1),
                        out_c=3)

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            rcnn_corners_preds = rcnn_corners_preds.view(
                batch_size, rcnn_bbox_preds.shape[1], -1)
            # rcnn_visibility_preds = rcnn_visibility_preds.view(
            # batch_size, rcnn_bbox_preds.shape[1], -1)
            rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds
                loss_units[constants.KEY_CORNERS_3D_GRNET][
                    'pred'] = rcnn_corners_preds
                # loss_units[constants.KEY_CORNERS_VISIBILITY][
                # 'pred'] = rcnn_visibility_preds
                # import ipdb
                # ipdb.set_trace()
                multi_stage_loss_units.append([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D],
                    loss_units[constants.KEY_CORNERS_3D_GRNET],
                    loss_units[constants.KEY_DIMS]
                ])
                multi_stage_stats.append(stats)
            else:

                # decode for next stage
                coder = bbox_coders.build({
                    'type':
                    constants.KEY_CORNERS_3D_GRNET
                })
                rcnn_corners_preds = coder.decode_batch(
                    rcnn_corners_preds.detach(), proposals,
                    feed_dict[constants.KEY_STEREO_CALIB_P2])
                coder = bbox_coders.build(
                    self.target_generators[i]
                    .target_generator_config['coder_config'])
                proposals = coder.decode_batch(rcnn_bbox_preds,
                                               proposals).detach()
                coder = bbox_coders.build({'type': constants.KEY_DIMS})
                rcnn_dim_preds = coder.decode_batch(
                    rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS],
                    rcnn_cls_probs).detach()

        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
            prediction_dict[constants.KEY_PROPOSALS] = proposals
        else:
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::2] = proposals[:, :, ::
                                             2] / image_info[:, 3].unsqueeze(
                                                 -1).unsqueeze(-1)
            proposals[:, :, 1::2] = proposals[:, :, 1::
                                              2] / image_info[:, 2].unsqueeze(
                                                  -1).unsqueeze(-1)
            # rcnn_corners_preds = coder.decode_batch(
            # rcnn_corners_preds.detach(), proposals)

            # import ipdb
            # ipdb.set_trace()
            # rcnn_corners_preds = torch.bmm(
            # feed_dict[constants.KEY_STEREO_CALIB_P2_ORIG],
            # rcnn_corners_preds)
            # assert rcnn_corners_preds.shape[0] == 1
            # rcnn_corners_preds = geometry_utils.torch_points_3d_to_points_2d(
            # rcnn_corners_preds[0].view(-1, 3),
            # feed_dict[constants.KEY_STEREO_CALIB_P2_ORIG][0]).view(-1, 8,
            # 2)
            N, M = rcnn_corners_preds.shape[:2]
            rcnn_corners_preds = rcnn_corners_preds.view(N, M, 8, 2)
            rcnn_corners_preds[:, :, :,
                               0] = rcnn_corners_preds[:, :, :,
                                                       0] / image_info[:, 3]
            rcnn_corners_preds[:, :, :,
                               1] = rcnn_corners_preds[:, :, :,
                                                       1] / image_info[:, 2]
            prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds
            prediction_dict[constants.KEY_BOXES_2D] = proposals
            prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds

        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
Ejemplo n.º 3
0
    def forward(self, feed_dict):
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                # proposals_dict
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None
                gt_dict[constants.KEY_CORNERS_2D] = None
                gt_dict[constants.KEY_DIMS] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                    constants.KEY_STEREO_CALIB_P2]
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
                    constants.KEY_LABEL_BOXES_3D]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals
                auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[
                    constants.KEY_MEAN_DIMS]
                auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[
                    constants.KEY_IMAGE_INFO]

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat_for_corners = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners)
            rcnn_corners_preds = self.rcnn_corners_preds[i](
                pooled_feat_for_corners)

            rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)
            if not self.class_agnostic:
                if self.training:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1))

                else:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1))

            # not class_agnostic for dims
            if not self.class_agnostic_3d:
                if self.training:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1),
                        out_c=3)
                else:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1),
                        out_c=3)

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            rcnn_corners_preds = rcnn_corners_preds.view(
                batch_size, rcnn_bbox_preds.shape[1], -1)

            rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3)

            # shape(N,C,1,1)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds
                loss_units[
                    constants.KEY_CORNERS_2D]['pred'] = rcnn_corners_preds
                multi_stage_loss_units.append([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D],
                    loss_units[constants.KEY_CORNERS_2D],
                    loss_units[constants.KEY_DIMS]
                ])
                multi_stage_stats.append(stats)

            # decode for next stage
            # corners decode
            corners_coder = bbox_coders.build(
                {'type': constants.KEY_CORNERS_2D_STABLE})
            rcnn_corners_preds = corners_coder.decode_batch(
                rcnn_corners_preds.detach(), proposals)

            # bbox decode
            boxes_coder = bbox_coders.build(
                self.target_generators[i].
                target_generator_config['coder_config'])
            proposals = boxes_coder.decode_batch(rcnn_bbox_preds,
                                                 proposals).detach()

            # dims decode
            dims_coder = bbox_coders.build({'type': constants.KEY_DIMS})
            rcnn_dim_preds = dims_coder.decode_batch(
                rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS],
                rcnn_cls_probs).detach()

        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
        else:
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            image_info = image_info.unsqueeze(1).unsqueeze(1)
            proposals[:, :, ::2] = proposals[:, :, ::2] / image_info[..., 3]
            proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[..., 2]

            rcnn_corners_preds[..., 0] = rcnn_corners_preds[
                ..., 0] / image_info[..., 3].unsqueeze(-1)
            rcnn_corners_preds[..., 1] = rcnn_corners_preds[
                ..., 1] / image_info[..., 2].unsqueeze(-1)

            prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds
            prediction_dict[constants.KEY_BOXES_2D] = proposals
            prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds
            prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds

        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
Ejemplo n.º 4
0
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                # proposals_dict
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[constants.
                                                           KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None
                gt_dict[constants.KEY_ORIENTS_V3] = None
                gt_dict[constants.KEY_DIMS] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                    constants.KEY_STEREO_CALIB_P2]
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
                    constants.KEY_LABEL_BOXES_3D]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals
                auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[
                    constants.KEY_MEAN_DIMS]

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat = pooled_feat.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat)
            rcnn_orient_preds = self.rcnn_orient_preds[i](pooled_feat)
            rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)
            if not self.class_agnostic:
                # import ipdb
                # ipdb.set_trace()
                if self.training:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1))
                else:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1))

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            rcnn_orient_preds = rcnn_orient_preds.view(batch_size, -1,
                                                       4 * self.num_bins)
            rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                loss_units[constants.KEY_ORIENTS_V3]['pred'] = rcnn_orient_preds
                loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds
                # import ipdb
                # ipdb.set_trace()
                multi_stage_loss_units.append([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D],
                    loss_units[constants.KEY_ORIENTS_V3],
                    loss_units[constants.KEY_DIMS]
                ])
                multi_stage_stats.append(stats)

            # decode for next stage
            coder = bbox_coders.build({'type': constants.KEY_BOXES_2D})
            #  rpn_proposals = proposals
            proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach()
            coder = bbox_coders.build({'type': constants.KEY_DIMS})
            rcnn_dim_preds = coder.decode_batch(
                rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS],
                rcnn_cls_probs).detach()
            coder = bbox_coders.build({'type': constants.KEY_ORIENTS_V3})
            # use rpn proposals to decode
            rcnn_orient_preds = coder.decode_batch(
                rcnn_orient_preds, self.rcnn_orient_loss.bin_centers,
                proposals, feed_dict[constants.KEY_STEREO_CALIB_P2]).detach()

        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
        else:
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs
            prediction_dict[constants.KEY_ORIENTS_V3] = rcnn_orient_preds

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::2] = proposals[:, :, ::
                                             2] / image_info[:, 3].unsqueeze(
                                                 -1).unsqueeze(-1)
            proposals[:, :, 1::2] = proposals[:, :, 1::
                                              2] / image_info[:, 2].unsqueeze(
                                                  -1).unsqueeze(-1)
            prediction_dict[constants.KEY_BOXES_2D] = proposals
            prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds
            prediction_dict[constants.KEY_ORIENTS_V3] = rcnn_orient_preds

        return prediction_dict
    def forward(self, feed_dict):
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        auxiliary_dict = {}
        output_dict = {}
        losses_dict = {}

        # TODO move all auxiliary item from feed_dict to auxiliary_dict
        # before get data from dataloader
        if feed_dict.get(constants.KEY_STEREO_CALIB_P2) is not None:
            auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                constants.KEY_STEREO_CALIB_P2]

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'base_feat': rpn_feat_maps})

        # rpn model
        #  prediction_dict.update(self.rpn_model.forward(feed_dict))
        instance, rpn_losses = self.rpn_model.forward(feed_dict)

        auxiliary_dict.update(instance)
        proposals = auxiliary_dict[constants.KEY_BOXES_2D]

        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:

                losses_units = self.instance_info.generate_losses(
                    output_dict, feed_dict, auxiliary_dict)

                losses_units, subsampled_mask = self.sampler.subsample_instance(
                    losses_units)
                proposals, _ = self.sampler.subsample_instance(
                    proposals, subsampled_mask)
                # update auxiliary dict
                # TODO subsample for all auxiliary_dict
                auxiliary_dict[constants.KEY_BOXES_2D] = proposals

                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                multi_stage_stats.append(
                    self.instance_info.generate_stats(auxiliary_dict))

            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat = pooled_feat.mean(3).mean(2)

            # collect output from network to output_dict
            for attr_name in self.branches:
                attr_preds = self.branches[attr_name][i](pooled_feat)
                output_dict[attr_name] = attr_preds

            # unsqueeze before calc loss
            batch_size = rois.shape[0]
            output_dict = self.instance_info.unsqueeze(output_dict, batch_size)
            if self.training:
                losses_units.update_from_output(output_dict)

            # decode
            instance = self.instance_info.generate_instance(
                output_dict, auxiliary_dict)

        if self.training:
            losses_dict.update(rpn_losses)
            losses = self.instance_info.calc_loss(losses_units)
            losses_dict.update(losses)
        else:
            losses_dict = None
            multi_stage_stats = None
            # rescale
            im_info = feed_dict[constants.KEY_IMAGE_INFO]
            instance = self.instance_info.affine_transform(instance, im_info)
        return instance, losses_dict, multi_stage_stats
Ejemplo n.º 6
0
    def forward(self, feed_dict):
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                # proposals_dict
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None
                gt_dict[constants.KEY_MOBILEYE] = None
                gt_dict[constants.KEY_DIMS] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                    constants.KEY_STEREO_CALIB_P2]
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
                    constants.KEY_LABEL_BOXES_3D]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals
                auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[
                    constants.KEY_MEAN_DIMS]
                auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[
                    constants.KEY_IMAGE_INFO]

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat_for_corners = self.feature_extractor.second_stage_feature(
                pooled_feat)
            # pooled_feat_for_keypoint = F.upsample_bilinear(
            # pooled_feat_for_corners, size=(14, 14))
            keypoint_map = self.keypoint_predictor(pooled_feat)
            # keypoint_map = self.rcnn_keypoint_preds(pooled_feat_for_keypoint)
            keypoint_map = keypoint_map.mean(-2)
            # keypoint_map = F.softmax(keypoint_map, dim=-1)

            pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners)
            rcnn_corners_preds = self.rcnn_corners_preds[i](
                pooled_feat_for_corners)
            rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)
            if not self.class_agnostic:
                # import ipdb
                # ipdb.set_trace()
                if self.training:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1))

                else:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1))

            # not class_agnostic for dims
            # import ipdb
            # ipdb.set_trace()
            if not self.class_agnostic_3d:
                if self.training:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1),
                        out_c=3)
                else:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1),
                        out_c=3)

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            rcnn_corners_preds = rcnn_corners_preds.view(
                batch_size, rcnn_bbox_preds.shape[1], -1)

            # rcnn_depth_preds = rcnn_depth_preds.view(
            # batch_size, rcnn_bbox_preds.shape[1], -1)
            # rcnn_center_depth_preds = rcnn_center_depth_preds.view(
            # batch_size, rcnn_bbox_preds.shape[1], -1)
            # concat them(depth and corners)
            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_depth_preds], dim=-1)

            # # append center depth
            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1)

            # rcnn_visibility_preds = rcnn_visibility_preds.view(
            # batch_size, rcnn_bbox_preds.shape[1], -1)
            rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3)

            # decode for next stage

            coder = bbox_coders.build({'type': constants.KEY_DIMS})
            rcnn_dim_preds = coder.decode_batch(
                rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS],
                rcnn_cls_probs).detach()

            # rcnn_corners_preds = coder.decode_batch(
            # rcnn_corners_preds.detach(), proposals)

            # import ipdb
            # ipdb.set_trace()
            # if self.training_depth:
            # # predict for depth
            # rois = box_ops.box2rois(proposals)
            # pooled_feat_for_depth = self.pyramid_rcnn_pooling(
            # rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2])

            # shape(N,C,1,1)
            # pooled_feat_for_depth = self.third_stage_feature(pooled_feat)
            # pooled_feat_for_depth = pooled_feat_for_depth.mean(3).mean(2)
            # rcnn_depth_preds = self.rcnn_depth_preds[i](pooled_feat_for_depth)

            # encode
            # rcnn_depth_preds = 1 / (rcnn_depth_preds.sigmoid() + 1e-6) - 1
            # rcnn_depth_preds = rcnn_depth_preds.view(
            # batch_size, rcnn_bbox_preds.shape[1], -1)

            # # concat them(depth and corners)
            # rcnn_corners_preds = self.fuse_corners_and_depth(
            # rcnn_corners_preds, rcnn_depth_preds)
            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_depth_preds], dim=-1)

            # # # append center depth
            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds
                loss_units[constants.KEY_MOBILEYE]['pred'] = rcnn_corners_preds
                multi_stage_loss_units.append([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D],
                    loss_units[constants.KEY_MOBILEYE],
                    loss_units[constants.KEY_DIMS]
                ])
                multi_stage_stats.append(stats)
        coder = bbox_coders.build({'type': constants.KEY_MOBILEYE})
        rcnn_corners_preds = coder.decode_batch(rcnn_corners_preds.detach(),
                                                proposals,
                                                keypoint_map.detach())
        prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds
        prediction_dict[constants.KEY_KEYPOINTS_HEATMAP] = keypoint_map
        # if self.training:
        # corners_2d_gt = coder.decode_batch(
        # loss_units[constants.KEY_MOBILEYE]['target'], proposals)
        # prediction_dict['corners_2d_gt'] = corners_2d_gt
        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
        else:
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs
            coder = bbox_coders.build(self.target_generators[i].
                                      target_generator_config['coder_config'])
            proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach()

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::
                      2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze(
                          -1).unsqueeze(-1)
            proposals[:, :,
                      1::2] = proposals[:, :,
                                        1::2] / image_info[:, 2].unsqueeze(
                                            -1).unsqueeze(-1)

            rcnn_corners_preds[:, :, :,
                               0] = rcnn_corners_preds[:, :, :,
                                                       0] / image_info[:, None,
                                                                       None, 3]
            rcnn_corners_preds[:, :, :,
                               1] = rcnn_corners_preds[:, :, :,
                                                       1] / image_info[:, None,
                                                                       None, 2]
            prediction_dict[constants.KEY_BOXES_2D] = proposals
            # prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds
            # prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds

        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
Ejemplo n.º 7
0
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'base_feat': base_feat})
        self.add_feat('base_feat', base_feat)

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.rcnn_pooling(base_feat, rois.view(-1, 5))

            # shape(N,C,1,1)
            pooled_feat = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat = pooled_feat.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)
            if not self.class_agnostic:
                # import ipdb
                # ipdb.set_trace()
                if self.training:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1))
                else:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1))

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                # import ipdb
                # ipdb.set_trace()
                multi_stage_loss_units.extend([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D]
                ])
                multi_stage_stats.append(stats)

            # decode for next stage
            coder = bbox_coders.build(self.target_generators[i].
                                      target_generator_config['coder_config'])
            proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach()

        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
        else:
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::
                      2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze(
                          -1).unsqueeze(-1)
            proposals[:, :,
                      1::2] = proposals[:, :,
                                        1::2] / image_info[:, 2].unsqueeze(
                                            -1).unsqueeze(-1)
            prediction_dict[constants.KEY_BOXES_2D] = proposals

        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
Ejemplo n.º 8
0
    def forward(self, feed_dict):
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                # proposals_dict
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None
                gt_dict[constants.KEY_CORNERS_2D] = None
                # gt_dict[constants.KEY_CORNERS_VISIBILITY] = None
                # gt_dict[constants.KEY_ORIENTS_V2] = None
                gt_dict[constants.KEY_DIMS] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                    constants.KEY_STEREO_CALIB_P2]
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
                    constants.KEY_LABEL_BOXES_3D]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals
                auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[
                    constants.KEY_MEAN_DIMS]
                auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[
                    constants.KEY_IMAGE_INFO]

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat_for_corners = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners)
            rcnn_corners_preds = self.rcnn_corners_preds[i](
                pooled_feat_for_corners)

            rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            rcnn_corners_preds = rcnn_corners_preds.view(
                batch_size, rcnn_bbox_preds.shape[1], -1)

            rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3)

            rcnn_depth_preds = self.rcnn_depth_preds[i](
                pooled_feat_for_corners)

            # rcnn_depth_preds = 1 / (rcnn_depth_preds.sigmoid() + 1e-6) - 1
            rcnn_depth_preds = rcnn_depth_preds.view(batch_size,
                                                     rcnn_bbox_preds.shape[1],
                                                     -1)

            rcnn_corners_preds = self.fuse_corners_and_depth(
                rcnn_corners_preds, rcnn_depth_preds)

            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_depth_preds], dim=-1)

            # # # append center depth
            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds
                loss_units[
                    constants.KEY_CORNERS_2D]['pred'] = rcnn_corners_preds
                # loss_units[constants.KEY_CORNERS_VISIBILITY][
                # 'pred'] = rcnn_visibility_preds
                # import ipdb
                # ipdb.set_trace()
                multi_stage_loss_units.append([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D],
                    loss_units[constants.KEY_CORNERS_2D],
                    loss_units[constants.KEY_DIMS]
                ])
                multi_stage_stats.append(stats)
            else:
                #  import ipdb
                #  ipdb.set_trace()
                center_depth = rcnn_corners_preds[:, :, -1:]
                coder = bbox_coders.build(
                    {'type': constants.KEY_CORNERS_2D_NEAREST_DEPTH})
                rcnn_corners_preds = coder.decode_batch(
                    rcnn_corners_preds.detach(), proposals,
                    feed_dict[constants.KEY_STEREO_CALIB_P2])
                coder = bbox_coders.build(
                    self.target_generators[i].
                    target_generator_config['coder_config'])
                proposals = coder.decode_batch(rcnn_bbox_preds,
                                               proposals).detach()
                coder = bbox_coders.build({'type': constants.KEY_DIMS})
                rcnn_dim_preds = coder.decode_batch(
                    rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS],
                    rcnn_cls_probs).detach()

        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
        else:
            prediction_dict[constants.KEY_CENTER_DEPTH] = center_depth
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::
                      2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze(
                          -1).unsqueeze(-1)
            proposals[:, :,
                      1::2] = proposals[:, :,
                                        1::2] / image_info[:, 2].unsqueeze(
                                            -1).unsqueeze(-1)

            rcnn_corners_preds[:, :, :,
                               0] = rcnn_corners_preds[:, :, :,
                                                       0] / image_info[:, None,
                                                                       None,
                                                                       3:4]
            rcnn_corners_preds[:, :, :,
                               1] = rcnn_corners_preds[:, :, :,
                                                       1] / image_info[:, None,
                                                                       None,
                                                                       2:3]

            prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds
            prediction_dict[constants.KEY_BOXES_2D] = proposals
            prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds
            prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds

        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
Ejemplo n.º 9
0
    def forward(self, feed_dict):
        output_dict = {}
        losses_dict = UncoverDict()
        auxiliary_dict = {}
        multi_stage_stats = []

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'base_feat': base_feat})

        # rpn model
        instance, losses = self.rpn_model.forward(feed_dict)

        # output_dict.update()
        auxiliary_dict.update(instance)
        proposals = auxiliary_dict[constants.KEY_BOXES_2D]

        for i in range(self.num_stages):

            if self.training:

                losses_units = self.instance_info.generate_losses(
                    output_dict, feed_dict, auxiliary_dict)

                losses_units, subsampled_mask = self.sampler.subsample_instance(
                    losses_units)
                proposals, _ = self.sampler.subsample_instance(
                    proposals, subsampled_mask)
                # update auxiliary dict
                # TODO subsample for all auxiliary_dict
                auxiliary_dict[constants.KEY_BOXES_2D] = proposals

                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                multi_stage_stats.append(
                    self.instance_info.generate_stats(auxiliary_dict))

            rois = box_ops.box2rois(proposals)
            pooled_feat = self.rcnn_pooling(base_feat, rois.view(-1, 5),
                                            1 / 16)

            # shape(N,C,1,1)
            pooled_feat = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat = pooled_feat.mean(3).mean(2)

            # collect output from network to output_dict
            for attr_name in self.branches:
                attr_preds = self.branches[attr_name][i](pooled_feat)
                output_dict[attr_name] = attr_preds

            # unsqueeze before calc loss
            batch_size = rois.shape[0]
            output_dict = self.instance_info.unsqueeze(output_dict, batch_size)
            if self.training:
                losses_units.update_from_output(output_dict)

            # decode
            instance = self.instance_info.generate_instance(
                output_dict, auxiliary_dict)

        if self.training:
            losses_dict.update(losses)
            losses = self.instance_info.calc_loss(losses_units)
            losses_dict.update(losses)
        else:
            losses_dict = None
            multi_stage_stats = None
            # rescale
            im_info = feed_dict[constants.KEY_IMAGE_INFO]
            instance = self.instance_info.affine_transform(instance, im_info)
        return instance, losses_dict, multi_stage_stats