def forward(self, im_data, im_info, gt_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        #num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data) # 1/8
        base_feat2 = self.RCNN_base2(base_feat1) # 1/16
        base_feat3 = self.RCNN_base3(base_feat2) # 1/32
        downSample = self.downSample(base_feat1)
        upSample = F.interpolate(base_feat3, scale_factor=2, mode='nearest')

        base_feat = torch.cat((downSample, base_feat2, upSample), 1)
        base_feat = self.downBeat(base_feat)
 
        #print(base_feat.shape)
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining       

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        #print(self.training)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        #print(rois.shape)
        #for index in range(0, 300):
        #    if cls_prob[:,index,0] < 0.5:
        #        print(cls_prob[:,index,:], rois[:,index,:])
        #print(bbox_pred)
        if self.training:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
        else:
            return rois, cls_prob, bbox_pred
    def forward(self, im_data, im_info, gt_boxes):
        cfg = self.cfg

        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        #num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        # print(base_feat.shape)
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        # if self.training and not self.class_agnostic:
        #    # select the corresponding columns according to roi labels
        #    bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
        #    bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
        #    bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        if self.training:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
        else:
            return rois, cls_prob, bbox_pred
    def forward(self, im_data, im_info, gt_boxes):
        cfg = self.cfg

        # print(im_data.shape, im_info)
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        # num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        # print(base_feat.shape)
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois
        pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        # print(self.training)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            if self.cfg.TRAIN.is_ohem_rcnn:
                RCNN_loss_cls = F.cross_entropy(cls_score,
                                                rois_label,
                                                reduction='none')

                top_k = int(0.125 * self.cfg.TRAIN.BATCH_SIZE *
                            base_feat.size(0))
                _, topk_loss_inds = RCNN_loss_cls.topk(top_k)
                RCNN_loss_cls = RCNN_loss_cls[topk_loss_inds].mean()
            else:
                RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            if cfg.TRAIN.loss_type == "smoothL1loss":
                if self.cfg.TRAIN.is_ohem_rcnn:
                    # RCNN_loss_bbox = _smooth_l1_loss(bbox_pred[topk_loss_inds, :], rois_target[topk_loss_inds, :],
                    #                                  rois_inside_ws[topk_loss_inds, :], rois_outside_ws[topk_loss_inds, :], sigma=3.0)

                    RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                     rois_inside_ws,
                                                     rois_outside_ws)

                else:
                    # bounding box regression L1 loss
                    RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                     rois_inside_ws,
                                                     rois_outside_ws)

                # RCNN_loss_bbox = _balance_smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
            elif "IOUloss" in cfg.TRAIN.loss_type:
                iou, g_iou = compute_iou(rois_target, rois_target,
                                         rois_inside_ws, rois_outside_ws)

                if cfg.TRAIN.loss_type == "GIOUloss":
                    RCNN_loss_bbox = 1 - g_iou
                elif cfg.TRAIN.loss_type == "IOUloss":
                    RCNN_loss_bbox = -iou.log()

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        if self.training:
            # return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox
        else:
            return rois, cls_prob, bbox_pred
    def forward(self, base_feat, im_info, gt_boxes):
        cfg = self.cfg

        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
        # get rpn classification score
        RPN_cls_score = self.RPN_cls_score(rpn_conv1)

        rpn_cls_score_reshape = self.reshape(RPN_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # get rpn offsets to the anchor boxes
        RPN_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois, all_proposals = self.RPN_proposal((rpn_cls_prob.data, RPN_bbox_pred.data,
                                                 im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target(
                (RPN_cls_score.data, gt_boxes, im_info))

            # compute classification loss
            RPN_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            RPN_cls_score = torch.index_select(
                RPN_cls_score.view(-1, 2), 0, rpn_keep)
            rpn_label = torch.index_select(
                rpn_label.view(-1), 0, rpn_keep.data)
            rpn_label = Variable(rpn_label.long())

            fg_cnt = torch.sum(rpn_label.data.ne(0))

            if self.cfg.TRAIN.is_ohem_rpn:  # added by Henson
                rpn_loss_cls = F.cross_entropy(
                    RPN_cls_score, rpn_label, reduction='none')

                top_k = int(0.125 * self.cfg.TRAIN.RPN_BATCHSIZE *
                            base_feat.size(0))
                _, topk_loss_inds = rpn_loss_cls.topk(top_k)
                self.rpn_loss_cls = rpn_loss_cls[topk_loss_inds].mean()
            else:
                self.rpn_loss_cls = F.cross_entropy(RPN_cls_score, rpn_label)

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            if cfg.TRAIN.loss_type == "smoothL1loss":
                if self.cfg.TRAIN.is_ohem_rpn:  # added by Henson
                    rpn_loss_box = _smooth_l1_loss_by_zcc(RPN_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
                                                          rpn_bbox_outside_weights, sigma=3, dim=[1, 2, 3])
                    rpn_loss_box = rpn_loss_box.view(-1)

                    top_k = int(0.125 * rpn_bbox_inside_weights.sum() + 0.5)
                    # print("=> top_k: ", top_k)
                    _, topk_loss_inds = rpn_loss_box.topk(top_k)
                    self.rpn_loss_box = rpn_loss_box[topk_loss_inds].mean()

                else:
                    self.rpn_loss_box = _smooth_l1_loss(RPN_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
                                                        rpn_bbox_outside_weights, sigma=3, dim=[1, 2, 3])

                # self.rpn_loss_box = _balance_smooth_l1_loss(RPN_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
                #                                                 rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])

                # iou, g_iou = compute_iou(
                #     gt_boxes[:, :, 0:4].view(-1, 4), gt_boxes[:, :, 0:4].view(-1, 4))  # all_proposals.view(-1, 4)

            elif "IOUloss" in cfg.TRAIN.loss_type:
                iou, g_iou = compute_iou(
                    rpn_bbox_targets, rpn_bbox_targets, rpn_bbox_inside_weights,
                    rpn_bbox_outside_weights)
                if cfg.TRAIN.loss_type == "GIOUloss":
                    self.rpn_loss_box = 1 - g_iou
                elif cfg.TRAIN.loss_type == "IOUloss":
                    self.rpn_loss_box = -iou.log()

        return rois, self.rpn_loss_cls, self.rpn_loss_box
Beispiel #5
0
    def forward(self, base_feat, im_info, gt_boxes):

        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
        # get rpn classification score
        rpn_cls_score = self.RPN_cls_score(rpn_conv1)

        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.RPN_proposal(
            (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score.data, gt_boxes, im_info))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0,
                                               rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3,
                                                dim=[1, 2, 3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box
Beispiel #6
0
    def forward(self, im_data, im_info, gt_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        #num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        #print(base_feat.shape)
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois
        pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        #print(self.training)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        if self.training:
            #return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox
        else:
            return rois, cls_prob, bbox_pred