Esempio n. 1
0
    def _PyramidRoI_Feat(self, feat_maps, rois, im_info):
        ''' roi pool on pyramid feature maps'''
        # do roi pooling based on predicted rois
        img_area = im_info[0][0] * im_info[0][1]
        h = rois.data[:, 4] - rois.data[:, 2] + 1
        w = rois.data[:, 3] - rois.data[:, 1] + 1
        roi_level = torch.log(torch.sqrt(h * w) / 224.0) / np.log(2)
        roi_level = torch.floor(roi_level + 4)
        # --------
        # roi_level = torch.log(torch.sqrt(h * w) / 224.0)
        # roi_level = torch.round(roi_level + 4)
        # ------
        roi_level[roi_level < 2] = 2
        roi_level[roi_level > 5] = 5
        # roi_level.fill_(5)
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            # NOTE: need to add pyrmaid
            grid_xy = _affine_grid_gen(rois, feat_maps.size()[2:], self.grid_size)  ##
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            roi_pool_feat = self.RCNN_roi_crop(feat_maps, Variable(grid_yx).detach()) ##
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2)

        elif cfg.POOLING_MODE == 'align':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]

        elif cfg.POOLING_MODE == 'pool':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]
            
        return roi_pool_feat
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_label = Variable(rois_label.view(-1).long())

            # TODO
            rois_main_label = Variable(rois_label.view(-1).long())
            rois_sub_class = list(
                map(lambda x: self.sub_classes[x], rois_main_label))
            rois_main_class = list(
                map(lambda x: sub2main_dict[x], rois_sub_class))
            rois_main_label = list(
                map(lambda x: self.main_classes.index(x), rois_main_class))
            rois_main_label = torch.cuda.LongTensor(rois_main_label)
            rois_main_label = Variable(rois_main_label)

            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_main_label = None
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        # return roi_data
        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        elif cfg.POOLING_MODE == 'pspool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat_main = self._head_to_tail_main(pooled_feat)
        pooled_feat_sub = self._head_to_tail_sub(pooled_feat)

        # compute bbox offset
        bbox_pred_main = self.RCNN_bbox_pred_main(pooled_feat_main)
        bbox_pred_sub = self.RCNN_bbox_pred_sub(pooled_feat_sub)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view_main = bbox_pred_main.view(
                bbox_pred_main.size(0), int(bbox_pred_main.size(1) / 4), 4)
            bbox_pred_select_main = torch.gather(
                bbox_pred_view_main, 1,
                rois_main_label.view(rois_main_label.size(0), 1,
                                     1).expand(rois_main_label.size(0), 1, 4))
            bbox_pred_main = bbox_pred_select_main.squeeze(1)

            bbox_pred_view_sub = bbox_pred_sub.view(
                bbox_pred_sub.size(0), int(bbox_pred_sub.size(1) / 4), 4)
            bbox_pred_select_sub = torch.gather(
                bbox_pred_view_sub, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred_sub = bbox_pred_select_sub.squeeze(1)

        # compute object classification probability
        cls_score_main = self.RCNN_cls_score_main(pooled_feat_main)
        cls_prob_main = F.softmax(cls_score_main, 1)
        cls_score_sub = self.RCNN_cls_score_sub(pooled_feat_sub)
        cls_prob_sub = F.softmax(cls_score_sub, 1)

        RCNN_loss_cls_main = 0
        RCNN_loss_bbox_main = 0

        RCNN_loss_cls_sub = 0
        RCNN_loss_bbox_sub = 0

        if self.training:
            # classification loss
            RCNN_loss_cls_main = F.cross_entropy(cls_score_main,
                                                 rois_main_label)

            # TODO roi_lable should
            RCNN_loss_cls_sub = F.cross_entropy(cls_score_sub, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox_main = _smooth_l1_loss(bbox_pred_main, rois_target,
                                                  rois_inside_ws,
                                                  rois_outside_ws)
            RCNN_loss_bbox_sub = _smooth_l1_loss(bbox_pred_main, rois_target,
                                                 rois_inside_ws,
                                                 rois_outside_ws)

        cls_prob_main = cls_prob_main.view(batch_size, rois.size(1), -1)
        bbox_pred_main = bbox_pred_main.view(batch_size, rois.size(1), -1)
        cls_prob_sub = cls_prob_sub.view(batch_size, rois.size(1), -1)
        bbox_pred_sub = bbox_pred_sub.view(batch_size, rois.size(1), -1)

        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls_main = torch.unsqueeze(RCNN_loss_cls_main, 0)
            RCNN_loss_bbox_main = torch.unsqueeze(RCNN_loss_bbox_main, 0)
            RCNN_loss_cls_sub = torch.unsqueeze(RCNN_loss_cls_sub, 0)
            RCNN_loss_bbox_sub = torch.unsqueeze(RCNN_loss_bbox_sub, 0)

        return rois, cls_prob_main, bbox_pred_main, cls_prob_sub, bbox_pred_sub, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls_sub, RCNN_loss_bbox_sub, RCNN_loss_cls_main, RCNN_loss_bbox_main, rois_label
Esempio n. 3
0
    def _PyramidRoI_Feat(self, feat_maps, rois, im_info):
        ''' roi pool on pyramid feature maps'''
        # do roi pooling based on predicted rois
        img_area = im_info[0][0] * im_info[0][1]
        h = rois.data[:, 4] - rois.data[:, 2] + 1
        w = rois.data[:, 3] - rois.data[:, 1] + 1
        roi_level = torch.log(torch.sqrt(h * w) / 224.0) / np.log(2)
        roi_level = torch.floor(roi_level + 4)
        # --------
        # roi_level = torch.log(torch.sqrt(h * w) / 224.0)
        # roi_level = torch.round(roi_level + 4)
        # ------
        roi_level[roi_level < 2] = 2
        roi_level[roi_level > 5] = 5
        # roi_level.fill_(5)
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            # NOTE: need to add pyrmaid
            grid_xy = _affine_grid_gen(rois,
                                       feat_maps.size()[2:],
                                       self.grid_size)  ##
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            roi_pool_feat = self.RCNN_roi_crop(feat_maps,
                                               Variable(grid_yx).detach())  ##
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2)

        elif cfg.POOLING_MODE == 'align':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]

        elif cfg.POOLING_MODE == 'pool':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]

        return roi_pool_feat
Esempio n. 4
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                use_gt_boxes=False):
        batch_size = im_data.size(0)
        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base_model(im_data)

        if not use_gt_boxes:
            # feed base feature map tp RPN to obtain rois
            rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
                base_feat, im_info.data, gt_boxes.data, num_boxes.data)
        else:
            # otherwise use groundtruth box as the outputs of RCNN_rpn
            rois = gt_boxes.data.clone()
            rois[0, :, 0] = 0
            rois[0, :, 1:] = gt_boxes.data[0, :, :4]
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        if not self.training:
            if batch_size == 1:
                valid = rois.sum(2).view(-1).nonzero().view(-1)
                rois = rois[:, valid, :]

        rpn_loss = rpn_loss_cls + rpn_loss_bbox

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes.data)
            # rois, rois_obj_label, rois_att_label, \
            # rois_target, rois_inside_ws, rois_outside_ws = roi_data
            # rois_obj_label = Variable(rois_obj_label.view(-1))
            # rois_att_label = Variable(rois_att_label.view(-1, self.n_att_classes))
            # rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            # rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            # rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))

            roi_data_msdn = self.RCNN_proposal_target_msdn(rois, gt_boxes.data)

            rois, roi_rel_pairs, roi_pair_proposals, rois_obj_label, rois_att_label, rois_rel_label, \
            rois_target, rois_inside_ws, rois_outside_ws = roi_data_msdn
            rois_obj_label = Variable(rois_obj_label.view(-1))
            rois_att_label = Variable(
                rois_att_label.view(-1, self.n_att_classes))
            rois_rel_label = Variable(rois_rel_label.view(-1))
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))

            roi_pair_proposals = roi_pair_proposals.long()
            roi_pair_proposals_v = roi_pair_proposals.view(-1, 2)
            ind_subject = roi_pair_proposals_v[:, 0]
            ind_object = roi_pair_proposals_v[:, 1]
        else:

            rois_obj_label = None
            rois_att_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

            rois_out = []
            roi_rel_pairs_out = []
            roi_pair_proposals_out = []
            for i in range(rois.size(0)):
                rois, roi_rel_pairs, roi_pair_proposals = self._setup_connection(
                    rois[i])
                rois_out.append(rois)
                roi_rel_pairs_out.append(roi_rel_pairs)
                roi_pair_proposals_out.append(roi_pair_proposals)

            rois = torch.stack(rois_out, 0)
            roi_rel_pairs = torch.stack(roi_rel_pairs_out, 0)
            roi_pair_proposals = torch.stack(roi_pair_proposals_out, 0)

            roi_pair_proposals = roi_pair_proposals.long()
            roi_pair_proposals_v = roi_pair_proposals.view(-1, 2)
            ind_subject = roi_pair_proposals_v[:, 0]
            ind_object = roi_pair_proposals_v[:, 1]

        rois = Variable(rois)

        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        x_obj = self._head_to_tail(pooled_feat)  # (B x N) x D

        # compute object classification probability
        obj_cls_score = self.RCNN_obj_cls_score(x_obj)
        obj_cls_prob = F.softmax(obj_cls_score)
        bbox_pred = self.RCNN_bbox_pred(x_obj)

        if cfg.HAS_ATTRIBUTES:
            x_att = self._head_to_tail_att(pooled_feat)  # (B x N) x D
            att_cls_score = self.RCNN_att_cls_score(x_att)
            att_cls_prob = F.softmax(att_cls_score)
            att_cls_log_prob = F.log_softmax(att_cls_score)

        if cfg.HAS_RELATIONS:

            # feed base feature map tp RPN to obtain rois
            # x_view = x.view(rois.size(0), rois.size(1), x.size(1))
            # rel_feats = obj_cls_score.view(rois.size(0), rois.size(1), obj_cls_score.size(1))
            # roi_rel_pairs, roi_pair_proposals, roi_rel_pairs_score, relpn_loss_cls = \
            #     self.RELPN_rpn(rois.data, rel_feats, im_info.data, gt_boxes.data, num_boxes.data, use_gt_boxes)

            # relpn_loss = relpn_loss_cls

            # size_per_batch = x_obj.size(0) / batch_size

            # roi_pair_proposals = roi_pair_proposals + torch.arange(0, batch_size).view(batch_size, 1, 1).type_as(roi_pair_proposals)\
            #     * size_per_batch

            # roi_pair_proposals_v = roi_pair_proposals.view(-1, 2)
            # ind_subject = roi_pair_proposals_v[:, 0]
            # ind_object = roi_pair_proposals_v[:, 1]

            # if self.training:

            #     roi_pair_data = self.RELPN_proposal_target(roi_rel_pairs, gt_boxes.data, num_boxes.data)

            #     # pdb.set_trace()

            #     roi_rel_pairs, rois_rel_label, roi_pair_keep = roi_pair_data
            #     rois_rel_label = Variable(rois_rel_label.view(-1))

            #     roi_pair_keep = roi_pair_keep + torch.arange(0, roi_pair_keep.size(0)).view(roi_pair_keep.size(0), 1).cuda() \
            #                                     * roi_pair_proposals_v.size(0) / batch_size
            #     roi_pair_keep = roi_pair_keep.view(-1).long()

            #     ind_subject = roi_pair_proposals_v[roi_pair_keep][:, 0]
            #     ind_object = roi_pair_proposals_v[roi_pair_keep][:, 1]

            rois_pred = combine_box_pairs(roi_rel_pairs.view(-1, 9))
            rois_pred = Variable(rois_pred)

            # # do roi pooling based on predicted rois
            if cfg.POOLING_MODE == 'crop':
                grid_xy = _affine_grid_gen(rois_pred.view(-1, 5),
                                           base_feat.size()[2:],
                                           self.grid_size)
                grid_yx = torch.stack(
                    [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                    3).contiguous()
                pooled_pred_feat = self.RELPN_roi_crop(
                    base_feat,
                    Variable(grid_yx).detach())
                if cfg.CROP_RESIZE_WITH_MAX_POOL:
                    pooled_pred_feat = F.max_pool2d(pooled_feat, 2, 2)
            elif cfg.POOLING_MODE == 'align':
                pooled_pred_feat = self.RELPN_roi_align(
                    base_feat, rois_pred.view(-1, 5))
            elif cfg.POOLING_MODE == 'pool':
                pooled_pred_feat = self.RELPN_roi_pool(base_feat,
                                                       rois_pred.view(-1, 5))

            # # combine subject, object and relation feature tohether
            x_pred = self._head_to_tail_rel(pooled_pred_feat)

            x_rel = x_pred  #torch.cat((x_sobj, x_pred, x_oobj), 1)

            # compute object classification probability
            rel_cls_score = self.RCNN_rel_cls_score(x_rel)
            rel_cls_prob = F.softmax(rel_cls_score)

        if cfg.GCN_ON_FEATS and cfg.GCN_LAYERS > 0:

            if cfg.GCN_HAS_ATTENTION:
                x_sobj = obj_cls_score[ind_subject]
                x_oobj = obj_cls_score[ind_object]
                attend_score = self.GRCNN_gcn_att1(x_sobj, x_oobj)  # N_rel x 1
                attend_score = attend_score.view(1, x_pred.size(0))
            else:
                attend_score = Variable(
                    x_rel.data.new(1, x_pred.size(0)).fill_(1))

            # compute the intiial maps, including map_obj_att, map_obj_obj and map_obj_rel
            # NOTE we have two ways to compute map among objects, one way is based on the overlaps among object rois.
            # NOTE the intution behind this is that rois with overlaps should share some common features, we need to
            # NOTE exclude one roi feature from another.
            # NOTE another way is based on the classfication scores. The intuition is that, objects have some common
            # cooccurence, such as bus are more frequently appear on the road.
            assert x_obj.size() == x_att.size(
            ), "the numbers of object features and attribute features should be the same"

            size_per_batch = x_obj.size(0) / batch_size

            assert x_obj.size() == x_att.size(
            ), "the numbers of object features and attribute features should be the same"
            map_obj_att = torch.eye(x_obj.size(0)).type_as(x_obj.data)

            if cfg.MUTE_ATTRIBUTES:
                map_obj_att.zero_()
                x_att = x_att.detach()

            map_obj_att = Variable(map_obj_att)

            map_obj_obj = x_obj.data.new(x_obj.size(0),
                                         x_obj.size(0)).fill_(0.0)
            eye_mat = torch.eye(size_per_batch).type_as(x_obj.data)
            for i in range(batch_size):
                map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i *
                            size_per_batch:(i + 1) * size_per_batch].fill_(1.0)
                map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch] =\
                    map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch]\
                    - eye_mat

            map_obj_obj = Variable(map_obj_obj)

            map_sobj_rel = Variable(
                x_obj.data.new(x_obj.size(0), x_rel.size(0)).zero_())
            map_sobj_rel.scatter_(
                0, Variable(ind_subject.contiguous().view(1, x_rel.size(0))),
                attend_score)
            map_oobj_rel = Variable(
                x_obj.data.new(x_obj.size(0), x_rel.size(0)).zero_())
            map_oobj_rel.scatter_(
                0, Variable(ind_object.contiguous().view(1, x_rel.size(0))),
                attend_score)
            map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 1)

            if cfg.MUTE_RELATIONS:
                map_obj_rel.data.zero_()
                x_rel = x_rel.detach()

            mat_phrase = Variable(torch.stack((ind_subject, ind_object), 1))

            # map_obj_rel = Variable(map_obj_rel)

            # x_obj = F.relu(self.fc4obj(x_obj))
            # x_att = F.relu(self.fc4att(x_att))
            # x_pred = F.relu(self.fc4rel(x_pred))
            for i in range(cfg.GCN_LAYERS):
                # pass graph representation to gcn
                x_obj, x_rel = self.imp(x_obj, x_rel, map_obj_rel, mat_phrase)

            # pdb.set_trace()
            # compute object classification loss
            obj_cls_score = self.RCNN_obj_cls_score(x_obj)
            obj_cls_prob = F.softmax(obj_cls_score)

            # compute attribute classification loss
            att_cls_score = self.RCNN_att_cls_score(x_att)
            att_cls_prob = F.softmax(att_cls_score)
            att_cls_log_prob = F.log_softmax(att_cls_score)

            # compute relation classifcation loss
            # x_sobj = x_obj[ind_subject]
            # x_oobj = x_obj[ind_object]
            x_rel = x_pred  # torch.cat((x_sobj, x_pred, x_oobj), 1)
            rel_cls_score = self.RCNN_rel_cls_score(x_rel)
            rel_cls_prob = F.softmax(rel_cls_score)

        self.RCNN_loss_bbox = 0
        self.RCNN_loss_obj_cls = 0
        self.RCNN_loss_att_cls = 0
        self.RCNN_loss_rel_cls = 0

        if self.training:

            self.fg_cnt = torch.sum(rois_obj_label.data.ne(0))
            self.bg_cnt = rois_obj_label.data.numel() - self.fg_cnt
            self.RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                  rois_inside_ws,
                                                  rois_outside_ws)

            # object classification loss
            obj_label = rois_obj_label.long()
            self.RCNN_loss_obj_cls = F.cross_entropy(obj_cls_score, obj_label)

            # attribute classification loss
            if cfg.HAS_ATTRIBUTES:
                att_label = rois_att_label
                att_label = att_label[rois_obj_label.data.nonzero().squeeze()]
                # att_cls_score = att_cls_score[rois_obj_label.data.nonzero().squeeze()]
                # self.RCNN_loss_att_cls = F.multilabel_soft_margin_loss(att_cls_score, att_label)
                att_cls_log_prob = att_cls_log_prob[
                    rois_obj_label.data.nonzero().squeeze()]
                self.RCNN_loss_att_cls = _softmax_with_loss(
                    att_cls_log_prob, att_label)

            if cfg.HAS_RELATIONS:
                self.rel_fg_cnt = torch.sum(rois_rel_label.data.ne(0))
                self.rel_bg_cnt = rois_rel_label.data.numel() - self.rel_fg_cnt

                # ce_weights = rel_cls_score.data.new(rel_cls_score.size(1)).fill_(1)
                # ce_weights[0] = float(self.rel_bg_cnt) / (rois_rel_label.data.numel() + 1e-5)
                # ce_weights = ce_weights
                rel_label = rois_rel_label.long()
                self.RCNN_loss_rel_cls = F.cross_entropy(
                    rel_cls_score, rel_label)

        rcnn_loss = self.RCNN_loss_bbox + self.RCNN_loss_obj_cls

        if cfg.HAS_ATTRIBUTES and not cfg.MUTE_ATTRIBUTES:
            rcnn_loss += cfg.WEIGHT_ATTRIBUTES * self.RCNN_loss_att_cls

        if cfg.HAS_RELATIONS and not cfg.MUTE_RELATIONS:
            rcnn_loss += cfg.WEIGHT_RELATIONS * self.RCNN_loss_rel_cls

        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        obj_cls_prob = obj_cls_prob.view(batch_size, rois.size(1), -1)
        att_cls_prob = None if not cfg.HAS_ATTRIBUTES else att_cls_prob.view(
            batch_size, rois.size(1), -1)
        rel_cls_prob = None if not cfg.HAS_RELATIONS else rel_cls_prob.view(
            batch_size,
            rel_cls_prob.size(0) / batch_size, -1)

        if self.ext_feat:
            rel_pairs = roi_pair_proposals
            return base_feat, rois.data, rel_pairs, bbox_pred.data, x_obj.data, x_att.data, x_rel.data, \
                    obj_cls_prob.data, att_cls_prob.data, rel_cls_prob.data, \
                    obj_cls_score.data, att_cls_score.data, rel_cls_score.data

        if cfg.HAS_ATTRIBUTES and cfg.HAS_RELATIONS:
            if self.training:
                return rois, bbox_pred, obj_cls_prob, att_cls_prob, rel_cls_prob, rpn_loss, rcnn_loss
            else:
                rel_pairs = roi_pair_proposals
                return rois, rel_pairs, bbox_pred, obj_cls_prob, att_cls_prob, rel_cls_prob, rpn_loss, rcnn_loss
        elif cfg.HAS_ATTRIBUTES:
            return rois, bbox_pred, obj_cls_prob, att_cls_prob, rpn_loss, rcnn_loss
        else:
            return rois, bbox_pred, obj_cls_prob, rpn_loss, rcnn_loss
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1)
        # if we want to change batch_size, we should consider to change roi2gt_assignment[0]
        # roi_part_match[0] and  roi_part_match_overlap[0] and so on

        if True:

            iou_threshold = 0.8
            dis_threshold = 0.2

            # first, calculate the overlaps among rois, set weights in edges between nodes iou>0.7 to 1
            overlaps = bbox_overlaps_batch(rois, rois)
            overlaps_bin = overlaps.cpu().data.numpy().copy()

            _, N_node, _ = overlaps.shape

            overlaps_bin1 = torch.unsqueeze(torch.eye(N_node, N_node).cuda(),
                                            dim=0)
            overlaps_bin1[overlaps >= iou_threshold] = 1
            overlaps_bin1[overlaps < iou_threshold] = 0

            for j in range(N_node):
                for k in range(N_node):
                    if overlaps_bin[0][j][k] >= iou_threshold:
                        overlaps_bin[0][j][k] = 1
                    else:
                        overlaps_bin[0][j][k] = 0
                    if k == j:
                        overlaps_bin[0][j][k] = 0

            # second, calculate the distance among rois, set weights in edges between nodes iou=0 and dis<threshold to 1
            distances = bbox_distances_batch(rois, rois)
            distances_bin = distances.cpu().data.numpy().copy()

            for j in range(N_node):
                for k in range(N_node):
                    if distances_bin[0][j][k] <= dis_threshold:
                        distances_bin[0][j][k] = 1
                    else:
                        distances_bin[0][j][k] = 0
                    if k == j:
                        distances_bin[0][j][k] = 0

            #adj_matrix_bin = overlaps_bin + distances_bin

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        dot_product_mat = torch.mm(pooled_feat,
                                   torch.transpose(pooled_feat, 0, 1))
        len_vec = torch.unsqueeze(torch.sqrt(
            torch.sum(pooled_feat * pooled_feat, dim=1)),
                                  dim=0)
        len_mat = torch.mm(torch.transpose(len_vec, 0, 1), len_vec)
        pooled_feat_sim_mat = dot_product_mat / len_mat

        cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

        # update 20191027: build graph for rois based on index (default: batch_size = 1)
        part_size = 10
        relation_size = 5
        if True:
            cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

            # calculate the adj_mat based on adj_matrix_bin, the weights on edges are the cosine distance between nodes
            adj_matrix = np.zeros((N_node, N_node))

            for s in range(N_node):
                row_idx = [t for t in range(N_node)]
                random.shuffle(row_idx)
                part_cnt = 0
                relation_cnt = 0
                for t in row_idx:
                    if part_cnt <= part_size:
                        if overlaps_bin[0, s, t] == 1:
                            node_feat_s = pooled_feat[s, :]
                            node_feat_t = pooled_feat[t, :]
                            adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
                            part_cnt = part_cnt + 1
                            continue
                for t in row_idx:
                    if part_cnt <= part_size:
                        if overlaps_bin[0, s, t] == 1:
                            node_feat_s = pooled_feat[s, :]
                            node_feat_t = pooled_feat[t, :]
                            adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
                            part_cnt = part_cnt + 1
                            continue
                    # if relation_cnt <= relation_size:
                    #     if distances_bin[0, s, t] == 1:
                    #         node_feat_s = pooled_feat[s, :]
                    #         node_feat_t = pooled_feat[t, :]
                    #         adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
                    #         relation_cnt = relation_cnt + 1
                    #         continue

                    # if part_cnt > part_size and relation_cnt > relation_size:
                    #     break
                    if part_cnt > part_size:
                        break

            adj_matrix = torch.from_numpy(adj_matrix).float().cuda()

            pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix))
            pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix))

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # update 2019-6-17:fix the bug for dimension specified as 0...
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Esempio n. 6
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        if self.K > 1 and self.training:
            # check we have the same GT num for all images of the stack
            _cmp = torch.nonzero( (gt_boxes[0] !=0).sum(1) ).numel()
            for k in range(self.K-1):
               assert torch.nonzero( (gt_boxes[k] !=0).sum(1) ).numel() == _cmp
               assert num_boxes[0] == num_boxes[k]

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data) # size [nBacth, nChan, H, W], usually nChan = 1024

        # feed base feature map tp RPN to obtain rois, rois size: [nBacth, numTopProps, 1+4]
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            # rois size: [nBacth, numTopTrain], rois_target and weights sizes:  [nBacth, numTopTrain, 4]
            # usually numTopTrain = 200
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        # feed pooled features to top model, e.g. in resnet, this is the 4th block layer followed by average pooling
        # on spatial dimension
        # sizes: [nBacth x nRoi, nChan, poolSize, poolSize] --> [nBacth x nRoi, nOutChan]
        # usually poolSize = 7 and nOutChan = 2048
        pooled_feat = self._head_to_tail(pooled_feat)

        if self.K > 1:
            # stack channels from all images making nBatch = 1
            nrois = rois.size(1)
            _, nc = pooled_feat.shape
            pooled_feat = pooled_feat.view(self.K, nrois, nc) # [nBacth, nRoi, nOutChan]
            pooled_feat.transpose_(0,1)
            pooled_feat = pooled_feat.contiguous().view(nrois, -1) # [nRoi, nBacth x nOutChan]

        # compute bbox offset
        # if not class agnostic: bbox_pred size [nBacth x nRoi, 4 x nClasses], nClasses includes bkg
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.K > 1:
            # reshape in the expected order
            bbox_pred = bbox_pred.view(nrois, self.K, self.n_classes * 4)
            bbox_pred = bbox_pred.transpose(0,1) # not inplace prevent autograd error
            bbox_pred = bbox_pred.contiguous().view(nrois * self.K, -1)

        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability of size: [nBacth x nRoi, nClasses]
        cls_score = self.RCNN_cls_score(pooled_feat)
        if self.K > 1:
            # we predict only one score for the whole stack, replicate it
            cls_score.unsqueeze_(0)
            cls_score = cls_score.repeat(self.K, 1, 1)
            cls_score = cls_score.view(-1, self.n_classes)

        cls_prob = F.softmax(cls_score, dim=1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1) # [nBacth, nRoi, nClasses]
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # [nBacth, nRoi, 4 or 4 x nClasses]

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        #print(im_data.shape)
        # feed base feature map tp RPN to obtain rois
        #todo modificato, adesso restituisce anche Ps e Rs (rpn_cls_score e rpn_bbox_pred)
        rois, rpn_loss_cls, rpn_loss_bbox, rpn_cls_score, rpn_bbox_pred, fg_bg_label, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining

        if self.training or self.teaching:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:

            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            # rpn_loss_bbox = 0
        '''
        roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
        rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

        rois_label = Variable(rois_label.view(-1).long())
        rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
        rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
        rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        '''

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic or self.teaching:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0
        #todo anche qui le loss L_hard per la cls   e L_s per la reg sono già calcolate
        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        if not (self.training or self.teaching):
            bbox_pred = bbox_pred.view(
                batch_size, rois.size(1),
                -1)  #reshape commentato per il calcolo della loss esterno

        # rpn_bbox_inside_weights(1,36,37,56)=outside
        RPN_mask = rpn_bbox_inside_weights, rpn_bbox_outside_weights

        # rpn_bbox_targets (1,36,37,56): 4 coordinate * 9 anchor per ciascun elemento della feature map
        # rpn_bbox_pred (1,36,37,56)
        # rpn_loss_box (int):
        RPN_reg = rpn_bbox_targets, rpn_bbox_pred, rpn_loss_bbox

        # rpn_cls_score (256,2): logits in uscita dalla strato convoluzionale senza calcolare softmax in RPN. Le probabilità le calcoliamo con softmax in loss.py
        # fg_back_ground_label (256 di 0,1): ground thruth-> back ground foreground
        # rpn_loss_cls (int)
        RPN_cls = rpn_cls_score, fg_bg_label, rpn_loss_cls

        # rois_inside_weights(256,4)=outside
        RCN_mask = rois_inside_ws, rois_outside_ws

        # roi (1,256,5): region of interest generate dal proposal layer (256)
        # rois_label (256):
        # bbox_pred (256,4)
        # rois_target (256,4)
        # RCNN_loss_bbox (int)
        RCN_reg = rois, rois_label, rois_target, bbox_pred, RCNN_loss_bbox

        # cls_score (256,21)
        # cls_prob (1,256,21)
        # RCNN_loss_cls(int)
        RCN_cls = cls_score, cls_prob, RCNN_loss_cls

        ###Losses:

        # Loss classification RPN: rpn_loss_cls
        # Loss regression_RCN : rpn_loss_bbox

        # Loss classification RCN: RCNN_loss_cls
        # Loss_regression RCN: RCNN_loss_bbox

        return RPN_mask, RPN_reg, RPN_cls, RCN_mask, RCN_reg, RCN_cls
Esempio n. 8
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_label = Variable(rois_label.view(-1).long())

            # TODO

            rois_main_label = Variable(rois_label.view(-1).long())
            rois_sub_class = list(map(
                lambda x: self.sub_classes[x], rois_main_label))
            rois_main_class = list(
                map(lambda x: sub2main_dict[x], rois_sub_class))
            rois_main_label = list(map(
                lambda x: self.main_classes.index(x), rois_main_class))
            rois_main_label = torch.cuda.LongTensor(rois_main_label)
            rois_main_label = Variable(rois_main_label)

            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_main_label = None
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        # return roi_data
        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(
                rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(
                base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        elif cfg.POOLING_MODE == 'pspool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # main Rcnn branch
        # feed pooled features to top model
        pooled_feat_main = self._head_to_tail_main(pooled_feat)

        nongt_dim = 300 if self.training else cfg.TEST.RPN_POST_NMS_TOP_N

        position_matrix = self.extract_position_matrix(
            rois.view(-1, 5)[:, :4].clone(), nongt_dim=nongt_dim)
        position_embedding = self.extract_position_embedding(
            position_matrix, feat_dim=64)

        pooled_feat_main = self.fc1(pooled_feat_main)
        attention_feat_1 = self.attention_1(
            pooled_feat_main, position_embedding)
        pooled_feat_main = pooled_feat_main + attention_feat_1
        pooled_feat_main = self.fc2(pooled_feat_main)
        attention_feat_2 = self.attention_2(pooled_feat, position_embedding)
        pooled_feat_main = pooled_feat_main + attention_feat_2

        # compute bbox offset
        bbox_pred_main = self.RCNN_bbox_pred_main(pooled_feat_main)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view_main = bbox_pred_main.view(
                bbox_pred_main.size(0), int(bbox_pred_main.size(1) / 4), 4)
            bbox_pred_select_main = torch.gather(bbox_pred_view_main, 1, rois_main_label.view(
                rois_main_label.size(0), 1, 1).expand(rois_main_label.size(0), 1, 4))
            bbox_pred_main = bbox_pred_select_main.squeeze(1)

        # compute object classification probability
        cls_score_main = self.RCNN_cls_score_main(pooled_feat_main)
        cls_prob_main = F.softmax(cls_score_main, 1)

        # sub Rcnn branch

        pooled_feat_sub = self._head_to_tail_sub(pooled_feat)

        #nongt_dim = 300 if self.training else cfg.TEST.RPN_POST_NMS_TOP_N

        # position_matrix = self.extract_position_matrix(
        #    rois.view(-1, 5)[:, :4].clone(), nongt_dim=nongt_dim)
        # position_embedding = self.extract_position_embedding(
        #    position_matrix, feat_dim=64)

        pooled_feat_sub = self.fc1(pooled_feat_sub)
        attention_feat_1_sub = self.attention_1(
            pooled_feat_sub, position_embedding)
        pooled_feat_sub = pooled_feat_sub + attention_feat_1_sub
        pooled_feat_sub = self.fc2(pooled_feat_sub)
        attention_feat_2_sub = self.attention_2(
            pooled_feat_sub, position_embedding)
        pooled_feat_sub = pooled_feat_sub + attention_feat_2_sub

        bbox_pred_sub = self.RCNN_bbox_pred_sub(pooled_feat_sub)
        if self.training and not self.class_agnostic:
            bbox_pred_view_sub = bbox_pred_sub.view(
                bbox_pred_sub.size(0), int(bbox_pred_sub.size(1) / 4), 4)
            bbox_pred_select_sub = torch.gather(bbox_pred_view_sub, 1, rois_label.view(
                rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred_sub = bbox_pred_select_sub.squeeze(1)

        cls_score_sub = self.RCNN_cls_score_sub(pooled_feat_sub)

        # pdb.set_trace()
        # process weight of main classes to sub score
        if 'score' in self.casecade_type:
            main_cls_weight = torch.cuda.FloatTensor(
                cls_score_main.size()[0], len(self.sub_classes))
            for key, val in self.main2sub_idx_dict.items():
                for column_idx in val:
                    main_cls_weight[:, column_idx] = cls_score_main[:, key]
            if self.casecade_type == 'add_score':
                cls_score_sub += main_cls_weight
            elif self.casecade_type == 'mul_score':
                cls_score_sub *= main_cls_weight

        cls_prob_sub = F.softmax(cls_score_sub, 1)

        # process weight of main classes to sub prob
        if 'prob' in self.casecade_type:
            main_cls_weight = torch.cuda.FloatTensor(
                cls_prob_main.size()[0], len(self.sub_classes))
            for key, val in self.main2sub_idx_dict.items():
                for column_idx in val:
                    main_cls_weight[:, column_idx] = cls_prob_main[:, key]
            if self.casecade_type == 'add_prob':
                # TODO normalized
                cls_prob_sub = cls_prob_sub * self.alpha + \
                    (1-self.alpha) * main_cls_weight

        RCNN_loss_cls_main = 0
        RCNN_loss_bbox_main = 0

        RCNN_loss_cls_sub = 0
        RCNN_loss_bbox_sub = 0

        if self.training:
            # classification loss
            RCNN_loss_cls_main = F.cross_entropy(
                cls_score_main, rois_main_label)

            # TODO roi_lable should
            RCNN_loss_cls_sub = F.cross_entropy(cls_score_sub, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox_main = _smooth_l1_loss(
                bbox_pred_main, rois_target, rois_inside_ws, rois_outside_ws)
            RCNN_loss_bbox_sub = _smooth_l1_loss(
                bbox_pred_main, rois_target, rois_inside_ws, rois_outside_ws)

        cls_prob_main = cls_prob_main.view(batch_size, rois.size(1), -1)
        bbox_pred_main = bbox_pred_main.view(batch_size, rois.size(1), -1)

        cls_prob_sub = cls_prob_sub.view(batch_size, rois.size(1), -1)
        bbox_pred_sub = bbox_pred_sub.view(batch_size, rois.size(1), -1)

        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls_main = torch.unsqueeze(RCNN_loss_cls_main, 0)
            RCNN_loss_bbox_main = torch.unsqueeze(RCNN_loss_bbox_main, 0)
            RCNN_loss_cls_sub = torch.unsqueeze(RCNN_loss_cls_sub, 0)
            RCNN_loss_bbox_sub = torch.unsqueeze(RCNN_loss_bbox_sub, 0)

        return rois, cls_prob_main, bbox_pred_main, cls_prob_sub, bbox_pred_sub, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls_sub, RCNN_loss_bbox_sub, RCNN_loss_cls_main, RCNN_loss_bbox_main, rois_label
Esempio n. 9
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes, pooling_size):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # pdb.set_trace()
        # print("shitrcnn2")
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        if not self.training:
            #pdb.set_trace()
            #rois = tweak_rois(rois)
            rois = select_rois(rois, base_feat)
            #pdb.set_trace()
            features = []
            prev_feat = im_data
            for i, module in enumerate(self.RCNN_base._modules.values()):
                # print(i)
                # import pdb; pdb.set_trace()
                # print("shitrcnn")
                next_feat = module(prev_feat)
                features.append(next_feat)
                prev_feat = next_feat

            features = [features[i] for i in self.interested_modules]

            popout_rois = np.ndarray((1, 4), dtype="float32")
            for iF in features:
                base_feat = iF
                # import pdb; pdb.set_trace()
                feature_width = base_feat.size()[2]
                self.RCNN_roi_pool = _RoIPooling(
                    pooling_size, pooling_size,
                    1.0 / (im_info[0][0] / feature_width))
                self.RCNN_roi_align = RoIAlignAvg(
                    pooling_size, pooling_size,
                    1.0 / (im_info[0][0] / feature_width))

                # do roi pooling based on predicted rois
                if cfg.POOLING_MODE == 'crop':
                    # pdb.set_trace()
                    # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
                    grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                               base_feat.size()[2:],
                                               self.grid_size)
                    grid_yx = torch.stack(
                        [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                        3).contiguous()
                    pooled_feat = self.RCNN_roi_crop(
                        base_feat,
                        Variable(grid_yx).detach())
                    if cfg.CROP_RESIZE_WITH_MAX_POOL:
                        pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
                elif cfg.POOLING_MODE == 'align':
                    pooled_feat = self.RCNN_roi_align(base_feat,
                                                      rois.view(-1, 5))
                elif cfg.POOLING_MODE == 'pool':
                    pooled_feat = self.RCNN_roi_pool(base_feat,
                                                     rois.view(-1, 5))

                # import pdb; pdb.set_trace()
                # print("shitrcnn1")

                #pdb.set_trace()
                # print("shitrcnn2")
                pooled_feat = pooled_feat.view(pooled_feat.shape[0], -1)
                #pdb.set_trace()
                popout_index = find_the_popout(pooled_feat)
                # import pdb; pdb.set_trace()
                # print("shitrcnn2")
                popout_rois = np.vstack((popout_rois, rois[0,
                                                           popout_index.item(),
                                                           1:5]))
            # import pdb; pdb.set_trace()
            # print("shitrcnn")
            popout_rois = popout_rois[1:, :]
            rois = rois[0, :, 1:].cpu().numpy()
            return rois, popout_rois
        else:

            # if it is training phrase, then use ground trubut bboxes for refining

            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))

            # do roi pooling based on predicted rois
            if cfg.POOLING_MODE == 'crop':
                # pdb.set_trace()
                # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
                grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                           base_feat.size()[2:],
                                           self.grid_size)
                grid_yx = torch.stack(
                    [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                    3).contiguous()
                pooled_feat = self.RCNN_roi_crop(base_feat,
                                                 Variable(grid_yx).detach())
                if cfg.CROP_RESIZE_WITH_MAX_POOL:
                    pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
            elif cfg.POOLING_MODE == 'align':
                pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
            elif cfg.POOLING_MODE == 'pool':
                pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

            # feed pooled features to top model
            pooled_feat = self._head_to_tail(pooled_feat)

            # compute bbox offset
            bbox_pred = self.RCNN_bbox_pred(pooled_feat)
            if self.training and not self.class_agnostic:
                # select the corresponding columns according to roi labels
                bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                                int(bbox_pred.size(1) / 4), 4)
                bbox_pred_select = torch.gather(
                    bbox_pred_view, 1,
                    rois_label.view(rois_label.size(0), 1,
                                    1).expand(rois_label.size(0), 1, 4))
                bbox_pred = bbox_pred_select.squeeze(1)

            # compute object classification probability
            cls_score = self.RCNN_cls_score(pooled_feat)
            cls_prob = F.softmax(cls_score, 1)

            RCNN_loss_cls = 0
            RCNN_loss_bbox = 0

            if self.training:
                # classification loss
                RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

                # bounding box regression L1 loss
                RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                 rois_inside_ws,
                                                 rois_outside_ws)

                # added by Cindy based on https://github.com/jwyang/faster-rcnn.pytorch/issues/226
                rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
                rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
                RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
                RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

            cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
            bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Esempio n. 10
0
    def forward(self, im_data, rois, im_info, labels=None, num_boxes=None):
        batch_size = im_data.size(0)
        num_rois = rois.size(0)

        # init_rois = rois.data
        if self.training:  # for multi-GPU
            try:
                nb = int(num_boxes[:, 0].item())
            except:
                nb = int(num_boxes.item())
            num_boxes = num_boxes.data
            # ret_prob = rois.new().new_zeros(1,rois.size(1),21)
            rois = rois[:, :nb]
            axis1 = int(num_boxes[:, 1].item())
            axis2 = int(num_boxes[:, 2].item())
            im_data = im_data[:, :, :axis1, :axis2]
            # im_data_for_aug = im_data.clone()
            num_boxes = nb
            # feed image data to base model to obtain base feature map
        else:
            num_boxes = num_rois
        base_feat = self.OICR_base(im_data)
        rois = Variable(rois)
        # do roi pooling based on predicted rois

        #cfg.POOLING_MODE = 'pool'
        cfg.POOLING_MODE = 'pool'
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.OICR_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.OICR_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.OICR_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        fc7 = self._head_to_tail(pooled_feat)  # fc7

        ic_score = self.ic_score(fc7).view(batch_size, num_boxes,
                                           self.n_classes + 1)
        ic_score1 = self.ic_score1(fc7).view(batch_size, num_boxes,
                                             self.n_classes + 1)
        ic_score2 = self.ic_score2(fc7).view(batch_size, num_boxes,
                                             self.n_classes + 1)
        self.ic_prob = F.softmax(ic_score, dim=2)
        self.ic_prob1 = F.softmax(ic_score1, dim=2)
        self.ic_prob2 = F.softmax(ic_score2, dim=2)
        # loss_midn=loss_oicr=loss_oicr1=loss_oicr2=0

        self.midn_prob0 = self.midn_score0(fc7).view(batch_size, num_boxes,
                                                     self.n_classes)
        self.midn_prob1 = self.midn_score1(fc7).view(batch_size, num_boxes,
                                                     self.n_classes)
        self.midn_prob0 = F.softmax(self.midn_prob0, dim=1)  # rois
        self.midn_prob1 = F.softmax(self.midn_prob1, dim=2)  # class
        self.midn_prob = self.midn_prob0 * self.midn_prob1
        if self.training:
            labels = labels.data
            if torch.isnan(fc7).sum() > 0 or torch.isnan(
                    self.midn_score0.weight.data).sum() > 0:
                pdb.set_trace()

            ### RPN ###
            # use rois_augment to choose pseudo gt
            gt_RPN, pos_samples = choose_gt(rois, self.midn_prob.clone(),
                                            labels)
            pgt_num_boxes = gt_RPN.shape[0]

            # use pseudo gt to generate rois_RPN
            rois_RPN, rpn_loss_cls, rpn_loss_bbox = self.OICR_rpn(
                base_feat, im_info, gt_RPN, pgt_num_boxes)
            rois_together = torch.cat(
                (pos_samples, rois_RPN),
                1)  # use rois_augment and rois_RPN together
            init_rois = rois_together.data
            ret_prob = rois_together.new().new_zeros(1, rois_together.size(1),
                                                     21)

            num_boxes_tog = rois_together.shape[1]
            pooled_feat = self.OICR_roi_pool(base_feat,
                                             rois_together.view(-1, 5))
            fc7 = self._head_to_tail(pooled_feat)

            ic_score = self.ic_score(fc7).view(batch_size, num_boxes_tog,
                                               self.n_classes + 1)
            ic_score1 = self.ic_score1(fc7).view(batch_size, num_boxes_tog,
                                                 self.n_classes + 1)
            ic_score2 = self.ic_score2(fc7).view(batch_size, num_boxes_tog,
                                                 self.n_classes + 1)
            self.ic_prob = F.softmax(ic_score, dim=2)
            self.ic_prob1 = F.softmax(ic_score1, dim=2)
            self.ic_prob2 = F.softmax(ic_score2, dim=2)

            self.midn_prob0 = self.midn_score0(fc7).view(
                batch_size, num_boxes_tog, self.n_classes)
            self.midn_prob1 = self.midn_score1(fc7).view(
                batch_size, num_boxes_tog, self.n_classes)
            self.midn_prob0 = F.softmax(self.midn_prob0, dim=1)  # rois
            self.midn_prob1 = F.softmax(self.midn_prob1, dim=2)  # class
            self.midn_prob = self.midn_prob0 * self.midn_prob1

            self.global_pool = self.midn_prob.sum(dim=1, keepdim=True)
            self.global_pool = self.global_pool.view(batch_size,
                                                     self.n_classes)
            loss_midn = multi_class_cross_entropy_loss(self.global_pool,
                                                       labels)

            ### end ###

            label_ic, cls_loss_weights = OICRLayer(rois_together,
                                                   self.midn_prob.clone(),
                                                   labels)
            label_ic1, cls_loss_weights1 = OICRLayer(rois_together,
                                                     self.ic_prob.clone(),
                                                     labels)
            label_ic2, cls_loss_weights2 = OICRLayer(rois_together,
                                                     self.ic_prob1.clone(),
                                                     labels)

            if torch.isnan(self.ic_prob).sum().data > 0 or torch.isnan(
                    self.ic_prob1).sum().data > 0 or torch.isnan(
                        self.ic_prob2).sum().data > 0:
                pdb.set_trace()

            label_ic = torch.FloatTensor(label_ic).cuda().detach()
            label_ic1 = torch.FloatTensor(label_ic1).cuda().detach()
            label_ic2 = torch.FloatTensor(label_ic2).cuda().detach()
            cls_loss_weights = torch.tensor(cls_loss_weights).cuda().detach()
            cls_loss_weights1 = torch.tensor(cls_loss_weights1).cuda().detach()
            cls_loss_weights2 = torch.tensor(cls_loss_weights2).cuda().detach()

            loss_oicr = WeightedSoftmaxWithLoss(self.ic_prob, label_ic,
                                                cls_loss_weights)
            loss_oicr1 = WeightedSoftmaxWithLoss(self.ic_prob1, label_ic1,
                                                 cls_loss_weights1)
            loss_oicr2 = WeightedSoftmaxWithLoss(self.ic_prob2, label_ic2,
                                                 cls_loss_weights2)

            # oicr_loss = loss_oicr + loss_oicr1 + loss_oicr2
            ret_prob[:, :num_boxes_tog] = (self.ic_prob + self.ic_prob1 +
                                           self.ic_prob2) / 3
            return init_rois, loss_midn.view(1), loss_oicr.view(
                1), loss_oicr1.view(1), loss_oicr2.view(
                    1), ret_prob, rpn_loss_cls, rpn_loss_bbox
        else:
            return self.ic_prob, self.ic_prob1, self.ic_prob2
Esempio n. 11
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes, tgt_im_data,
                tgt_im_info, tgt_gt_boxes, tgt_num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        tgt_im_info = tgt_im_info.data
        tgt_gt_boxes = tgt_gt_boxes.data
        tgt_num_boxes = tgt_num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        tgt_base_feat = self.RCNN_base(tgt_im_data)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:

            # feed base feature map tp RPN to obtain rois
            rois, rpn_loss_cls, rpn_loss_bbox, rpn_cls_prob, rois_select = self.RCNN_rpn(
                base_feat, im_info, gt_boxes, num_boxes)
            tgt_rois, tgt_rpn_loss_cls, tgt_rpn_loss_bbox, tgt_rpn_cls_prob, tgt_rois_select = self.RCNN_rpn(
                tgt_base_feat, tgt_im_info, tgt_gt_boxes, tgt_num_boxes)

            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))

            tgt_roi_data = self.RCNN_proposal_target(tgt_rois, tgt_gt_boxes,
                                                     tgt_num_boxes)
            tgt_rois, tgt_rois_label, tgt_rois_target, tgt_rois_inside_ws, tgt_rois_outside_ws = tgt_roi_data

            tgt_rois_label = Variable(tgt_rois_label.view(-1).long())
            tgt_rois_target = Variable(
                tgt_rois_target.view(-1, tgt_rois_target.size(2)))
            tgt_rois_inside_ws = Variable(
                tgt_rois_inside_ws.view(-1, tgt_rois_inside_ws.size(2)))
            tgt_rois_outside_ws = Variable(
                tgt_rois_outside_ws.view(-1, tgt_rois_outside_ws.size(2)))
        else:

            # feed base feature map tp RPN to obtain rois
            rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
                base_feat, im_info, gt_boxes, num_boxes)
            tgt_rois, tgt_rpn_loss_cls, tgt_rpn_loss_bbox = self.RCNN_rpn(
                tgt_base_feat, tgt_im_info, tgt_gt_boxes, tgt_num_boxes)

            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

            tgt_rois_label = None
            tgt_rois_target = None
            tgt_rois_inside_ws = None
            tgt_rois_outside_ws = None
            tgt_rpn_loss_cls = 0
            tgt_rpn_loss_bbox = 0

        rois = Variable(rois)
        tgt_rois = Variable(tgt_rois)

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # for RCNN
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)

            tgt_grid_xy = _affine_grid_gen(tgt_rois.view(-1, 5),
                                           tgt_base_feat.size()[2:],
                                           self.grid_size)
            tgt_grid_yx = torch.stack(
                [tgt_grid_xy.data[:, :, :, 1], tgt_grid_xy.data[:, :, :, 0]],
                3).contiguous()
            tgt_pooled_feat = self.RCNN_roi_crop(
                tgt_base_feat,
                Variable(tgt_grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                tgt_pooled_feat = F.max_pool2d(tgt_pooled_feat, 2, 2)

            # for RPN adaptive loss
            if self.training:
                grid_xy_ = _affine_grid_gen(rois_select,
                                            base_feat.size()[2:],
                                            self.grid_size)
                grid_yx_ = torch.stack(
                    [grid_xy_.data[:, :, :, 1], grid_xy_.data[:, :, :, 0]],
                    3).contiguous()
                pooled_feat_ = self.RCNN_roi_crop(base_feat,
                                                  Variable(grid_yx_).detach())
                if cfg.CROP_RESIZE_WITH_MAX_POOL:
                    pooled_feat_ = F.max_pool2d(pooled_feat_, 2, 2)

                tgt_grid_xy_ = _affine_grid_gen(tgt_rois_select,
                                                tgt_base_feat.size()[2:],
                                                self.grid_size)
                tgt_grid_yx_ = torch.stack([
                    tgt_grid_xy_.data[:, :, :, 1], tgt_grid_xy_.data[:, :, :,
                                                                     0]
                ], 3).contiguous()
                tgt_pooled_feat_ = self.RCNN_roi_crop(
                    tgt_base_feat,
                    Variable(tgt_grid_yx_).detach())
                if cfg.CROP_RESIZE_WITH_MAX_POOL:
                    tgt_pooled_feat_ = F.max_pool2d(tgt_pooled_feat_, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            # for RCNN
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
            tgt_pooled_feat = self.RCNN_roi_align(tgt_base_feat,
                                                  tgt_rois.view(-1, 5))

            # for RPN adaptive loss
            if self.training:
                pooled_feat_ = self.RCNN_roi_align(base_feat, rois_select)
                tgt_pooled_feat_ = self.RCNN_roi_align(tgt_base_feat,
                                                       tgt_rois_select)
        elif cfg.POOLING_MODE == 'pool':
            # for RCNN
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))
            tgt_pooled_feat = self.RCNN_roi_pool(tgt_base_feat,
                                                 tgt_rois.view(-1, 5))

            # for RPN adaptive loss
            if self.training:
                pooled_feat_ = self.RCNN_roi_pool(base_feat, rois_select)
                tgt_pooled_feat_ = self.RCNN_roi_pool(tgt_base_feat,
                                                      tgt_rois_select)

        # get the adaptive feature for RPN
        if self.training:
            rpn_adapt_feat = self.rpn_adapt_feat(
                pooled_feat_.view(pooled_feat.size(0), -1))
            tgt_rpn_adapt_feat = self.rpn_adapt_feat(
                tgt_pooled_feat_.view(tgt_pooled_feat.size(0), -1))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        tgt_pooled_feat = self._head_to_tail(tgt_pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        tgt_bbox_pred = self.RCNN_bbox_pred(tgt_pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

            tgt_bbox_pred_view = tgt_bbox_pred.view(
                tgt_bbox_pred.size(0), int(tgt_bbox_pred.size(1) / 4), 4)
            tgt_bbox_pred_select = torch.gather(
                tgt_bbox_pred_view, 1,
                tgt_rois_label.view(tgt_rois_label.size(0), 1,
                                    1).expand(tgt_rois_label.size(0), 1, 4))
            tgt_bbox_pred = tgt_bbox_pred_select.squeeze(1)

        # compute object classification probability
        adapt_feat = self.RCNN_adapt_feat(pooled_feat)
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        tgt_adapt_feat = self.RCNN_adapt_feat(tgt_pooled_feat)
        tgt_cls_score = self.RCNN_cls_score(tgt_pooled_feat)
        tgt_cls_prob = F.softmax(tgt_cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0
        tgt_RCNN_loss_cls = 0
        tgt_RCNN_loss_bbox = 0
        RCNN_loss_intra = 0
        RCNN_loss_inter = 0
        RPN_loss_intra = 0
        RPN_loss_inter = 0
        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            tgt_RCNN_loss_cls = F.cross_entropy(tgt_cls_score, tgt_rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)
            tgt_RCNN_loss_bbox = _smooth_l1_loss(tgt_bbox_pred,
                                                 tgt_rois_target,
                                                 tgt_rois_inside_ws,
                                                 tgt_rois_outside_ws)

            # intra-class and inter-class adaptation loss
            # pull same classes and push away different classes of source and target domains
            if self.mode == 'adapt':
                RCNN_loss_intra, RCNN_loss_inter = self.adaptive_loss(
                    adapt_feat, cls_prob, tgt_adapt_feat, tgt_cls_prob,
                    batch_size)
            # use gcn to cluster the representation of every class
            elif self.mode == 'gcn_adapt':
                RCNN_loss_intra, RCNN_loss_inter = self.gcn_adaptive_loss(
                    adapt_feat, cls_prob, rois, tgt_adapt_feat, tgt_cls_prob,
                    tgt_rois, batch_size)

            # intra-class and inter-class losses for RPN
            # pull same classes and push away different classes of source and target domains
            if self.rpn_mode == 'adapt':
                RPN_loss_intra, RPN_loss_inter = self.adaptive_loss_rpn(
                    rpn_adapt_feat, rpn_cls_prob, tgt_rpn_adapt_feat,
                    tgt_rpn_cls_prob, batch_size)
            # use gcn to cluster the representation of every class
            elif self.rpn_mode == 'gcn_adapt':
                RPN_loss_intra, RPN_loss_inter = self.gcn_adaptive_loss(
                    rpn_adapt_feat, rpn_cls_prob, rois, tgt_rpn_adapt_feat,
                    tgt_rpn_cls_prob, tgt_rois, batch_size)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        tgt_cls_prob = tgt_cls_prob.view(batch_size, tgt_rois.size(1), -1)
        tgt_bbox_pred = tgt_bbox_pred.view(batch_size, tgt_rois.size(1), -1)

        if self.training:
            return rois, tgt_rois, cls_prob, tgt_cls_prob, bbox_pred, tgt_bbox_pred, rpn_loss_cls.view(-1), tgt_rpn_loss_cls.view(-1), \
             rpn_loss_bbox.view(-1), tgt_rpn_loss_bbox.view(-1), RCNN_loss_cls.view(-1), tgt_RCNN_loss_cls.view(-1), RCNN_loss_bbox.view(-1), \
             tgt_RCNN_loss_bbox.view(-1), RCNN_loss_intra.view(-1), RCNN_loss_inter.view(-1), rois_label, tgt_rois_label, \
                   RPN_loss_intra.view(-1), RPN_loss_inter.view(-1)
        else:
            return rois, tgt_rois, cls_prob, tgt_cls_prob, bbox_pred, tgt_bbox_pred, rpn_loss_cls, tgt_rpn_loss_cls, rpn_loss_bbox, \
             tgt_rpn_loss_bbox, RCNN_loss_cls, tgt_RCNN_loss_cls, RCNN_loss_bbox, tgt_RCNN_loss_bbox, \
             RCNN_loss_intra, RCNN_loss_inter, rois_label, tgt_rois_label, RPN_loss_intra, RPN_loss_inter
Esempio n. 12
0
    def forward(self, im_data, im_info, gt_boxes, gt_boxes_sens, num_boxes):
        batch_size = im_data[0].size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        gt_boxes_sens = gt_boxes_sens.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat_c = self.RCNN_base_c(im_data[0])
        base_feat_t = self.RCNN_base_t(im_data[1])
        base_feat_fused = 0.5 * (base_feat_c + base_feat_t)
        base_feat_fused = self.RCNN_base_fused(base_feat_fused)
        conv5_c = self.RCNN_base_f1(base_feat_c)
        conv5_t = self.RCNN_base_f2(base_feat_t)

        # feed fused base feature map to RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat_fused, im_info, gt_boxes, num_boxes)

        # if it is training phase, then use ground truth bboxes for refining
        if self.training:
            # 50% jitter probability
            if np.random.rand(1)[0]>0.5:
                jitter = (torch.randn(1,256,4)/20).cuda()
            else:
                jitter = (torch.zeros(1,256,4)).cuda()
            # feed jitter to obtain rois_align_target
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, gt_boxes_sens, num_boxes, jitter, im_info)
            rois, rois_jittered, rois_label, rois_target, rois_align_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_align_target = Variable(rois_align_target.view(-1, rois_align_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_jittered = copy.deepcopy(rois)
            rois_label = None
            rois_target = None
            rois_align_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0


        # Region Feature Alignment module
        ctx_rois = bbox_contextual_batch(rois)
        clip_boxes(ctx_rois[:,:,1:], im_info, batch_size)
        ctx_rois = Variable(ctx_rois)
        ctx_rois_jittered = bbox_contextual_batch(rois_jittered)
        clip_boxes(ctx_rois_jittered[:,:,1:], im_info, batch_size)
        ctx_rois_jittered = Variable(ctx_rois_jittered)

        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(ctx_rois.view(-1, 5), conv5_c.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_c = self.RCNN_roi_crop(conv5_c, Variable(grid_yx).detach())
            grid_xy = _affine_grid_gen(ctx_rois_jittered.view(-1, 5), conv5_t.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_t = self.RCNN_roi_crop(conv5_t, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_c = F.max_pool2d(pooled_feat_c, 2, 2)
                pooled_feat_t = F.max_pool2d(pooled_feat_t, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_c = self.RCNN_roi_align(conv5_c, ctx_rois.view(-1, 5))    
            pooled_feat_t = self.RCNN_roi_align(conv5_t, ctx_rois_jittered.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_c = self.RCNN_roi_pool(conv5_c, ctx_rois.view(-1,5))
            pooled_feat_t = self.RCNN_roi_pool(conv5_t, ctx_rois_jittered.view(-1,5))
        
        pooled_feat_res = pooled_feat_t - pooled_feat_c

        # feed pooled features to top model
        pooled_feat_res = self._head_to_tail_align(pooled_feat_res)
        bbox_align_pred = self.RCNN_bbox_align_pred(pooled_feat_res)

        RCNN_loss_bbox_align = 0
        
        # Apply bounding-box regression deltas
        box_deltas = bbox_align_pred.data
        box_deltas_zeros = torch.zeros(box_deltas.shape).cuda()
        box_deltas = torch.cat((box_deltas, box_deltas_zeros), 1)


        # Optionally normalize targets by a precomputed mean and stdev
        # The roi alignment process is class_agnostic
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(batch_size, -1, 4)

        rois_sens = rois_jittered.new(rois_jittered.size()).zero_()
        rois_sens[:,:,1:5] = bbox_transform_inv(rois_jittered[:,:,1:5], box_deltas, batch_size)

        clip_boxes(rois_sens[:,:,1:5], im_info, batch_size)
        


        rois = Variable(rois)
        rois_sens = Variable(rois_sens)

        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois.view(-1, 5), conv5_c.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_c = self.RCNN_roi_crop(conv5_c, Variable(grid_yx).detach())
            grid_xy = _affine_grid_gen(rois_sens.view(-1, 5), conv5_t.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_t = self.RCNN_roi_crop(conv5_t, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_c = F.max_pool2d(pooled_feat_c, 2, 2)
                pooled_feat_t = F.max_pool2d(pooled_feat_t, 2, 2)

        elif cfg.POOLING_MODE == 'align':
            pooled_feat_c = self.RCNN_roi_align(conv5_c, rois.view(-1, 5))
            pooled_feat_t = self.RCNN_roi_align(conv5_t, rois_sens.view(-1, 5))

        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_c = self.RCNN_roi_pool(conv5_c, rois.view(-1, 5))
            pooled_feat_t = self.RCNN_roi_pool(conv5_t, rois_sens.view(-1, 5))
                                                        
        cls_score_ref = self.confidence_ref(self.RCNN_top_ref(pooled_feat_c.view(pooled_feat_c.size(0), -1)))
        cls_score_sens = self.confidence_sens(self.RCNN_top_sens(pooled_feat_t.view(pooled_feat_t.size(0), -1)))
        cls_prob_ref = F.softmax(cls_score_ref, 1)
        cls_prob_sens = F.softmax(cls_score_sens, 1)

        confidence_ref = torch.abs(cls_prob_ref[:,1]-cls_prob_ref[:,0])
        confidence_sens = torch.abs(cls_prob_sens[:,1]-cls_prob_sens[:,0])
        confidence_ref = confidence_ref.unsqueeze(1).unsqueeze(2).unsqueeze(3)
        confidence_sens = confidence_sens.unsqueeze(1).unsqueeze(2).unsqueeze(3)

        pooled_feat_c = confidence_ref * pooled_feat_c
        pooled_feat_t = confidence_sens * pooled_feat_t
        pooled_feat = pooled_feat_c + pooled_feat_t


        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_cls_ref = 0
        RCNN_loss_cls_sens = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            RCNN_loss_cls_ref = F.cross_entropy(cls_score_ref, rois_label)
            RCNN_loss_cls_sens = F.cross_entropy(cls_score_sens, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
            RCNN_loss_bbox_align = _smooth_l1_loss(bbox_align_pred, rois_align_target[:,:2], rois_inside_ws[:,:2], rois_outside_ws[:,:2])


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, rois_sens, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_cls_ref, RCNN_loss_cls_sens, RCNN_loss_bbox, RCNN_loss_bbox_align, rois_label
Esempio n. 13
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1)
        # if we want to change batch_size, we should consider to change roi2gt_assignment[0]
        # roi_part_match[0] and  roi_part_match_overlap[0] and so onif self.training:

        # part_threshold = 0.25
        #
        # # first, calculate the overlaps among rois and gt, get the max roi for each gt (node_cls)
        overlaps = bbox_overlaps_batch(rois, rois)[0]

        N_node, _ = overlaps.shape

        node_list = [i for i in range(N_node)]

        for j in range(N_node):
            for k in range(N_node):
                if overlaps[j][k] != 0:
                    overlaps[j][k] = 1
                if k == j:
                    overlaps[j][k] = 0

        idx_subgraph, vertex_subgraph = subgraph_split(overlaps)

        # max_overlaps_rois2gt, roi2gt_assignment = torch.max(overlaps, 1)
        #
        # # second, calculate the overlaps among rois and rois_select,
        # # using threshold to select roi for each rois_select (node_part)
        #
        # rois_cls_tmp = rois[:, roi2gt_assignment[0], :]
        # rois_cls_num = np.argwhere(gt_boxes[:, :, 4].cpu().data.numpy()[0] != 0).shape[0]
        # rois_cls_tmp = rois_cls_tmp[:,:rois_cls_num, :]
        # rois_cls = rois_cls_tmp.new(rois_cls_tmp.size(0), rois_cls_tmp.size(1), 5).zero_()
        # rois_cls[:, :, :4] = rois_cls_tmp[:, :, 1:5]
        # rois_cls[:, :, 4] = rois_cls_tmp[:, :, 0]
        #
        # # rois_cls_idx_list is the idx related from rois_cls to rois
        # roi_cls_idx_list = roi2gt_assignment[0][:rois_cls_num]
        #
        # overlaps = bbox_overlaps_batch(rois, rois_cls)
        # max_overlaps_rois2cls, roi2cls_assignment = torch.max(overlaps, 2)
        #
        # roi_part_match_overlap = max_overlaps_rois2cls.cpu().data.numpy()
        # roi_part_match = roi2cls_assignment.cpu().data.numpy()
        #
        # # roi_part_idx_list is the idx related from rois_part to rois
        # roi_part_idx_list = []
        # roi_part_match_idx = np.unique(roi_part_match[0])
        # for roi_cls_idx in roi_part_match_idx:
        #     match_idx_tmp = np.transpose(np.argwhere(roi_part_match[0] == roi_cls_idx))[0]
        #     match_overlap_tmp = roi_part_match_overlap[0][match_idx_tmp]
        #     # use threshold to select rois_part
        #     match_idx_tmp_select = np.transpose(np.argwhere(match_overlap_tmp > part_threshold))[0]
        #     match_idx_tmp = match_idx_tmp[match_idx_tmp_select]
        #     roi_part_idx_list.append(torch.from_numpy(match_idx_tmp))

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # # update 20191027: build graph for rois based on index (default: batch_size = 1)
        # adj_jud = np.zeros((0))
        # adj_rois = torch.zeros(0).cuda().long()
        # for i in range(roi_cls_idx_list.shape[0]):
        #     adj_jud = np.concatenate((adj_jud, [1]))
        #     adj_rois = torch.cat((adj_rois, roi_cls_idx_list[i:i+1]))
        #     try:
        #         adj_jud = np.concatenate((adj_jud, np.zeros((roi_part_idx_list[i].shape[0]))))
        #         adj_rois = torch.cat((adj_rois, roi_part_idx_list[i].cuda()))
        #     except IndexError:
        #         print ('IndexError happen, continue')
        #         continue
        #
        # node_cls_idx = np.transpose(np.argwhere(adj_jud == 1))[0]
        #
        # adj_matrix_bin = np.zeros((len(adj_jud), len(adj_jud)))
        #
        # # link edges for node_cls to node_cls
        # for k in range(len(node_cls_idx)-1):
        #     idx_node_cls_1 = node_cls_idx[k]
        #     idx_node_cls_2 = node_cls_idx[k + 1]
        #     adj_matrix_bin[idx_node_cls_1, idx_node_cls_2] = 1
        #     adj_matrix_bin[idx_node_cls_2, idx_node_cls_1] = 1
        #
        # # link edges for node_cls to related node_part
        # for k in range(len(node_cls_idx)-1):
        #     idx_start = node_cls_idx[k]
        #     idx_end = node_cls_idx[k + 1]
        #     for s in range(idx_start, idx_end):
        #         for t in range(idx_start, idx_end):
        #             if s == t:
        #                 adj_matrix_bin[s, t] = 0
        #             else:
        #                 adj_matrix_bin[s, t] = 1

        # # calculate the adj_mat based on adj_matrix_bin, the weights on edges are the cosine distance between nodes
        # adj_matrix = np.zeros((len(adj_jud), len(adj_jud)))
        #
        # cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
        #
        # for s in range(len(adj_jud)):
        #     for t in range(len(adj_jud)):
        #         if adj_matrix_bin[s, t] == 1:
        #             node_feat_s = pooled_feat[adj_rois[s], :]
        #             node_feat_t = pooled_feat[adj_rois[t], :]
        #             adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
        #         else:
        #             adj_matrix[s, t] = 0
        #
        # adj_matrix = torch.from_numpy(adj_matrix).float().cuda()
        #
        # pooled_feat[adj_rois, :] = F.relu(self.gcn1(pooled_feat[adj_rois, :], adj_matrix))
        # pooled_feat[adj_rois, :] = F.relu(self.gcn2(pooled_feat[adj_rois, :], adj_matrix))

        # adj_jud = np.zeros((N_node, N_node))
        adj_matrix = np.zeros((N_node, N_node))
        #
        # for k in range(idx_subgraph):
        #     idx_k = np.transpose(np.argwhere(vertex_subgraph == k))[0]
        #     for s in range(idx_k.shape[0]):
        #         for t in range(idx_k.shape[0]):
        #             if s == t:
        #                 adj_jud[s, t] = 0
        #             else:
        #                 adj_jud[s, t] = 1
        #
        cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

        for s in range(N_node):
            for t in range(N_node):
                #if adj_jud[s,t] != 0:
                if s != t:
                    node_feat_s = pooled_feat[s, :]
                    node_feat_t = pooled_feat[t, :]
                    adj_matrix[s, t] = cos(node_feat_s, node_feat_t)

        adj_matrix = torch.from_numpy(adj_matrix).float().cuda()

        pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix))
        pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix))

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # update 2019-6-17:fix the bug for dimension specified as 0...
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        # ========= Union Box ==========
        whole_box = union_box_layer(rois, im_info)
        whole_box = whole_box.reshape(whole_box.shape[0], 1, 5)
        whole = torch.from_numpy(whole_box)
        whole = whole.type(torch.cuda.FloatTensor)
        # whole = whole_box.view([-1, 5])

        # edges = edge_box_layer(rois, im_info)
        # edges = torch.from_numpy(edges)
        # edge = edges.view([-1, 12])

        edges_all = edge_whole_layer(rois, im_info)
        edges_all = torch.from_numpy(edges_all)

        # whole_rois = torch.cat((whole, rois), 1)

        rois = Variable(rois)

        # print rois.size()
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
            whole_pool_feat = self.RCNN_roi_align_whole(
                base_feat, whole.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))
            whole_pool_feat = self.RCNN_roi_pool(base_feat, whole.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        whole_pool_feat = self._head_to_tail(whole_pool_feat)

        ##########structure_inference_spmm#################

        # pooled_feat = structure_inference_spmm(pooled_feat , whole_pool_feat, edges, rois.size()[1])
        pooled_feat = self.Structure_inference(edges_all, pooled_feat,
                                               whole_pool_feat,
                                               rois.size()[1])

        # print 'pooled_feat.shape:   ',  pooled_feat.shape
        # print 'rois.shape:   ', rois.shape
        # print 'edges.shape: ', edges.shape

        #coordinate = self.coor_fc( rois[:,:,1:].reshape(rois.shape[1], 4) )
        #pooled_feat = torch.cat(( coordinate ,pooled_feat),1)
        #pooled_feat = torch.add(coordinate, pooled_feat)

        # #########  external_dim ###########
        #
        # external_feature = rois[:,:,3:].view([128,2])
        # pooled_feat = self.External(pooled_feat,external_feature)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Esempio n. 15
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                target=False,
                eta=1.0):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        if self.context:
            domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
                return domain_p  #, diff
            _, feat = self.netD(base_feat.detach())
        else:
            domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return domain_p  #,diff
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        if self.context:
            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)
        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, domain_p  #,diff
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                target=False,
                eta=1.0):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # get all vector of class for label
        if self.training and target:
            cls_label_ind = torch.unique(gt_boxes[:, :, 4].cpu())
            cls_label = torch.zeros(self.n_classes)
            cls_label[cls_label_ind.long()] = 1
            # assume always have backgound categories
            cls_label[0] = 1
            cls_label = cls_label.cuda()
            cls_label.requires_grad = False

        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        if self.lc:
            d_pixel, _ = self.netD_pixel_1(grad_reverse(base_feat1, lambd=eta))
            # print(d_pixel)
            if not target:
                _, feat_pixel = self.netD_pixel_1(base_feat1.detach())
        else:
            d_pixel = self.netD_pixel_1(grad_reverse(base_feat1, lambd=eta))

        base_feat2 = self.RCNN_base2(base_feat1)
        if self.lc:
            d_pixel_2, _ = self.netD_pixel_2(
                grad_reverse(base_feat2, lambd=eta))
        else:
            d_pixel_2 = self.netD_pixel_2(grad_reverse(base_feat2, lambd=eta))

        base_feat3 = self.RCNN_base3(base_feat2)
        if self.lc:
            d_pixel_3, _ = self.netD_pixel_3(
                grad_reverse(base_feat3, lambd=eta))
        else:
            d_pixel_3 = self.netD_pixel_3(grad_reverse(base_feat3, lambd=eta))
            # print(d_pixel_3.mean())

        base_feat4 = self.RCNN_base4(base_feat3)
        if self.gc:
            d_pixel_4, _ = self.netD_1(grad_reverse(base_feat4, lambd=eta))
        else:
            d_pixel_4 = self.netD_1(grad_reverse(base_feat4, lambd=eta))

        # something wrong
        base_feat = self.RCNN_base5(base_feat4)
        # for target domain training, we need to return the d_pixel, domain_p
        if self.gc:
            domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p
            _, feat = self.netD(base_feat.detach())
        else:
            domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training and not target:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        #feat_pixel = torch.zeros(feat_pixel.size()).cuda()
        if self.lc:
            feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
        if self.gc:
            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)
            # compute bbox offset

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic and not target:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        # compute the sum of weakly score
        if False:
            #cls_prob_sum = torch.sum(cls_prob, 0)
            # x = max(1, x)
            #cls_prob_sum = cls_prob_sum.repeat(2, 1)
            #cls_prob_sum = torch.min(cls_prob_sum, 0)[0]
            max_roi_cls_prob = torch.max(cls_prob, 0)[0]
            #assert (max_roi_cls_prob.data.cpu().numpy().all() >= 0. and max_roi_cls_prob.data.cpu().numpy().all() <= 1.)
            if not (max_roi_cls_prob.data.cpu().numpy().all() >= 0.
                    and max_roi_cls_prob.data.cpu().numpy().all() <= 1.):
                pdb.set_trace()
            if not (cls_label.data.cpu().numpy().all() >= 0.
                    and cls_label.data.cpu().numpy().all() <= 1.):
                pdb.set_trace()
            BCE_loss = F.binary_cross_entropy(max_roi_cls_prob, cls_label)
            return d_pixel, domain_p, BCE_loss

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        # for weakly detection, concentrate the cls_score and calculate the loss

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p
        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p  # ,diff
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)

            #rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois, rois_label, rois_scale_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            # EDIT
            rois_scale_label = Variable(rois_scale_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            # EDIT
            rois_scale_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois
        #print(rois)
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 6), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 6))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 6))

        #print(pooled_feat.shape)
        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        if self.training:
            fg_inds = torch.squeeze((rois_scale_label >= 0).nonzero(), 1)
            #print((rois_scale_label >= 0).nonzero().shape)
            # compute object scale classification probability
            # EDIT
            scale_score = self.RCNN_scale_score(pooled_feat)
            scale_score = scale_score[fg_inds]
            # EDIT
            rois_scale_label = rois_scale_label[fg_inds]
        #print(rois_scale_label)
        #print(scale_score)
        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0
        RCNN_loss_scale = 0
        RCNN_loss_scale_adv = 0
        RCNN_acc_scale = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

            # scale classification loss
            # EDIT
            #print(pooled_feat)
            #print(scale_score)
            #print(rois_scale_label)
            RCNN_loss_scale = F.cross_entropy(scale_score, rois_scale_label)

            #RCNN_loss_scale_adv = torch.sum(F.softmax(scale_score, dim=1) * F.log_softmax( torch.clamp(logsoftmax(scale_score), min=1e-10, max=1.0), dim=1))
            softmax = nn.Softmax(dim=1)
            RCNN_loss_scale_adv = torch.mean(
                torch.sum(softmax(scale_score) * torch.log(torch.clamp(softmax(scale_score), min=1e-10, max=1.0)), 1))
            correct = scale_score.max(1)[1].type_as(rois_scale_label).eq(rois_scale_label)
            if not hasattr(correct, 'sum'):
                correct = correct.cpu()
            correct = correct.sum().type(torch.FloatTensor).cuda()
            RCNN_acc_scale = correct / scale_score.size(0)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        #https://github.com/jwyang/faster-rcnn.pytorch/issues/226
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)
            RCNN_loss_scale = torch.unsqueeze(RCNN_loss_scale, 0)
            RCNN_loss_scale_adv = torch.unsqueeze(RCNN_loss_scale_adv, 0)
            RCNN_acc_scale = torch.unsqueeze(RCNN_acc_scale, 0)

            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, \
                   RCNN_loss_scale, RCNN_loss_scale_adv, RCNN_acc_scale, \
                   rois_label

        return rois, cls_prob, bbox_pred, RCNN_acc_scale
Esempio n. 18
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Esempio n. 19
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)
        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data
        # print('--faster rcnn -- ,im_data = {},im_info = {},gt_boxes = {},num_boxes = {}'.format(im_data.size(), im_info,
        #                                                                                         gt_boxes, num_boxes))

        # feed image data to base model to obtain base feature map
        base_feat_times = self.RCNN_base(im_data)  # [1, 832, L/4, 7, 7]
        # print('base_feat_times size = {}'.format(base_feat_times.size()))

        if self.base_feature_mean:
            base_feat_key = torch.mean(base_feat_times, 2)
        else:
            base_feat_time_len = base_feat_times.size(2)
            base_feat_key_time = base_feat_time_len // 2
            base_feat_key = base_feat_times[:, :, base_feat_key_time, :, :]
        # base_feat_key_time = 4,base_fear_middle size = torch.Size([1, 832, 7, 7])
        # print('base_feat_times = {} ,base_fear_middle size = {}'.format(base_feat_times.size(),base_feat_key.size()))

        # feed base feature map tp RPN to obtain rois,rois 在proposal layer中已经进行了第一次修正
        # print('im_info = {},gt_boxes = {},num_boxes = {} '.format(im_info,gt_boxes, num_boxes))
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat_key, im_info, gt_boxes, num_boxes)
        # rois size = test:([1, 300, 5]) , train:[b, 2000, 5]
        # rois[0,0,:] =[   0.0000,  190.2723,   39.9991,  208.7142,  102.8349] ,[   0.0000,  222.2723,  167.9991,  240.7142,  230.8349]

        #   print('rpn_loss_cls = {}, rpn_loss_cls = {}, rois size = {},rois ={}'.format(rpn_loss_cls,rpn_loss_cls, rois.size(),rois[0,1000,:]))

        # need replicating in time dim for rois

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
            #train: rois = ([b, 128, 5]), rois_label = [b*128], rois_target size = torch.Size([b*128, 4])
            #print('---RCNN_proposal_target----,rois = {}, rois_label = {}, rois_target size = {},rois_outside_ws ={}'
            #       .format(rois.size(), rois_label.size(), rois_target.size(), rois_outside_ws.size))

        else:

            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
        rois = Variable(
            rois
        )  #[b,max_num,(label,w,h,x,y)],  test: ([b, 300, 5]  train: ([b, 128, 5])

        #recycle roi pooling

        # roi_pooled_fs = []
        # for i in range(base_feat_time_len):
        #
        #     pooled_feat = self.RCNN_roi_pool(base_feat_times[:,:,i,:,:], rois.view(-1, 5))
        #     # print('pooled_feat size = {}'.format(pooled_feat.size()))
        #     torch.c
        #     roi_pooled_fs.append(pooled_feat)
        #
        # print('roi_pooled_fs size = {}'.format(len(roi_pooled_fs)))
        if self.is_pool:
            pooled_feat_0 = self.RCNN_roi_pool(
                base_feat_times[:, :, 0, :, :], rois.view(-1, 5)
            )  # [b*num,c,w,h] = test:([300, 832, 7, 7]) ,train:  [b*128, 832, 7, 7]
            pooled_feat_0 = torch.unsqueeze(pooled_feat_0, 2)
            pooled_feat_1 = torch.unsqueeze(
                self.RCNN_roi_pool(base_feat_times[:, :, 1, :, :],
                                   rois.view(-1, 5)), 2)
            pooled_feat_2 = torch.unsqueeze(
                self.RCNN_roi_pool(base_feat_times[:, :, 2, :, :],
                                   rois.view(-1, 5)), 2)
            pooled_feat_3 = torch.unsqueeze(
                self.RCNN_roi_pool(base_feat_times[:, :, 3, :, :],
                                   rois.view(-1, 5)), 2)
            # pooled_feat_4 = torch.unsqueeze(self.RCNN_roi_pool(base_feat_times[:, :, 4, :, :], rois.view(-1, 5)),2)
            # pooled_feat_5 = torch.unsqueeze(self.RCNN_roi_pool(base_feat_times[:, :, 5, :, :], rois.view(-1, 5)),2)
            # pooled_feat_6 = torch.unsqueeze(self.RCNN_roi_pool(base_feat_times[:, :, 6, :, :], rois.view(-1, 5)),2)
            # pooled_feat_7 = torch.unsqueeze(self.RCNN_roi_pool(base_feat_times[:, :, 7, :, :], rois.view(-1, 5)),2)
            # print('pooled_feat7 size = {},pooled_feat0 size = {}'.format(pooled_feat_7.size(),pooled_feat_0.size()))
        else:
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat_key.size()[2:],
                                       self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()

            pooled_feat_0 = F.max_pool2d(
                self.RCNN_roi_crop(base_feat_times[:, :, 0, :, :],
                                   Variable(grid_yx).detach()), 2, 2)
            pooled_feat_0 = torch.unsqueeze(pooled_feat_0, 2)

            pooled_feat_1 = F.max_pool2d(
                self.RCNN_roi_crop(base_feat_times[:, :, 1, :, :],
                                   Variable(grid_yx).detach()), 2, 2)
            pooled_feat_1 = torch.unsqueeze(pooled_feat_1, 2)

            pooled_feat_2 = F.max_pool2d(
                self.RCNN_roi_crop(base_feat_times[:, :, 2, :, :],
                                   Variable(grid_yx).detach()), 2, 2)
            pooled_feat_2 = torch.unsqueeze(pooled_feat_2, 2)

            pooled_feat_3 = F.max_pool2d(
                self.RCNN_roi_crop(base_feat_times[:, :, 3, :, :],
                                   Variable(grid_yx).detach()), 2, 2)
            pooled_feat_3 = torch.unsqueeze(pooled_feat_3, 2)

        # test:([b*300, 832,4, 7, 7]) ,train:  [b*128, 832, 4,7, 7]
        pooled_feat_cat = torch.cat(
            [pooled_feat_0, pooled_feat_1, pooled_feat_2, pooled_feat_3],
            2)  #,pooled_feat_4,pooled_feat_5,pooled_feat_6,pooled_feat_7],2)
        #print('pooled_feat0 size = {} , pooled_feat_cat size = {}'.format(pooled_feat_0.size(),pooled_feat_cat.size()))

        #  test: ([b * 300, 1024]),train:[b*128,1024]
        pooled_feat = self._head_to_tail(pooled_feat_cat)
        # print('after top pooled_feat size = {}'.format(pooled_feat.size()))

        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        # print('bbox_pred size = {}'.format(bbox_pred.size()))
        cls_score = self.RCNN_cls_score(pooled_feat)
        # print('cls_score size = {}'.format(cls_score.size()))
        cls_prob = F.softmax(cls_score, 1)
        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0
        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        # print('cls_score = {}, bbox_pred = {}'.format(cls_prob,bbox_pred))

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Esempio n. 20
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes, t_rois=None):
        batch_size = im_data.size(0)

        base_feat = self.RCNN_base(im_data)

        if t_rois is None:
            rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
                base_feat, im_info, gt_boxes, num_boxes)
        else:
            rois = t_rois

        # if it is training phase, then use ground truth bboxes for refining
        if self.training and t_rois is None:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat_post = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat_post)
        cls_score = self.RCNN_cls_score(pooled_feat_post)
        cls_prob = F.softmax(cls_score, 1)

        if t_rois is not None:
            return pooled_feat_post, cls_prob

        if self.training and not self.class_agnostic and t_rois is None:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        if self.training:
            return rois_label, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox
        else:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Esempio n. 21
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                domain=None,
                l=0,
                loss_start=False):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes,
                                                 domain, self.transfer)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws, domain_label = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        #-----------------------transfer learning----------------------------#
        #print(domain)
        dom_loss = 0

        #base line: transfer == False
        if self.training and self.transfer:
            if self.grl:
                dom_input = ReverseLayerF.apply(pooled_feat, l)
            else:
                dom_input = pooled_feat

            dom_pred = self._domain_classify(dom_input)
            domain_label = Variable(domain_label.cpu().cuda().view(-1).long())

            ############Process Tranfer Loss Weight#########
            if loss_start:
                p_target = F.softmax(dom_pred * self.transfer_gamma)[:, 0]
                domain_label.data = domain_label.data.type(
                    torch.FloatTensor).cuda()
                l_target = domain_label

                self.weight = p_target**l_target
            ###############################################

            ##############DOMAIN LOSS SELECTION##########

            else:
                ids = torch.LongTensor(1).cuda()

                # random select
                if self.transfer_select == 'RANDOM':
                    perm = torch.randperm(rois.size(1))
                    ids = perm[:rois.size(1) / 8].cuda()

                # select positive sample and predicted postive sample
                elif self.transfer_select == 'CONDITION':
                    ids = torch.range(0, rois.size(1) / 8 - 1)
                    ids = torch.Tensor.long(ids).cuda()

                # select all postive sample
                elif self.transfer_select == 'POSITIVE':
                    ids = torch.nonzero(rois_label.data)
                    ids = torch.squeeze(ids).cuda()

                # select all postive sample
                elif self.transfer_select == 'BALANCE':
                    ids_p = torch.nonzero(rois_label.data)
                    ids_p = torch.squeeze(ids_p).cuda()

                    ids_n = (rois_label.data == 0).nonzero()
                    ids_n = torch.squeeze(ids_n).cuda()
                    ids_n = ids_n[:ids_p.size(0)]

                    ids = torch.cat((ids_p, ids_n), 0).cuda()

                # select all sample
                if self.transfer_select == 'ALL':
                    dom_pred_loss = dom_pred
                    dom_label_loss = domain_label
                else:
                    dom_pred_loss = dom_pred[ids]
                    dom_label_loss = domain_label[ids]

                ##########DOMAIN LOSS SELECTION DONE##########

                dom_loss = F.cross_entropy(dom_pred_loss, dom_label_loss)

                dom_loss = dom_loss * (
                    self.transfer_weight.expand_as(dom_loss))
        #---------------------transfer learning done-------------------------#

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            if self.transfer and loss_start:
                rois_label_loss = torch.eye(
                    self.n_classes)[rois_label.data.cpu()].type(
                        torch.FloatTensor)
                rois_label_loss = Variable(rois_label_loss.cuda())
                weight_loss_cls = self.weight.view(rois_label.size(0),
                                                   1).repeat(
                                                       1, self.n_classes)

                RCNN_loss_cls = F.binary_cross_entropy_with_logits(
                    cls_score, rois_label_loss, weight_loss_cls)

                # bounding box regression L1 loss
                RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                 rois_inside_ws,
                                                 rois_outside_ws, True, True,
                                                 self.weight)

            else:
                RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

                # bounding box regression L1 loss
                RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                 rois_inside_ws,
                                                 rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, dom_loss
Esempio n. 22
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data
        self.batch_size = im_data.size(0)

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        # Base feature
        base_feat = self.RCNN_conv_new(base_feat)

        # Local feature with PS-ROIPooling
        # Local classification
        local_cls_feat = self.RCNN_local_cls_base(base_feat)
        local_cls_feat = self.RCNN_psroi_pool_cls(local_cls_feat,
                                                  rois.view(-1, 5))
        local_cls = self.avg_pooling(local_cls_feat)
        local_cls = self.RCNN_local_cls_fc(local_cls)

        # Local bbox regression
        local_bbox_feat = self.RCNN_local_bbox_base(base_feat)
        local_bbox_feat = self.RCNN_psroi_pool_loc(local_bbox_feat,
                                                   rois.view(-1, 5))
        local_bbox = self.avg_pooling(local_bbox_feat)

        # Global feature with ROIPooling
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        global_base = self.RCNN_global_base(pooled_feat)
        global_cls = self.RCNN_global_cls(global_base)
        global_bbox = self.RCNN_global_bbox(global_base)

        # fusion global feature and local feature
        cls_score = (local_cls + global_cls).squeeze()
        bbox_pred = (local_bbox + global_bbox).squeeze()

        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        cls_prob = F.softmax(cls_score, dim=1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            loss_func = self.ohem_detect_loss if cfg.TRAIN.OHEM else self.detect_loss
            RCNN_loss_cls, RCNN_loss_bbox = loss_func(cls_score, rois_label,
                                                      bbox_pred, rois_target,
                                                      rois_inside_ws,
                                                      rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Esempio n. 23
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                return_feats=False,
                oracle_rois=None):
        """

        :param im_data:
        :param im_info:
        :param gt_boxes:
        :param num_boxes:
        :param return_feats:
        :param oracle_rois: Use GT ROIs for feature extraction (NOT SUPPORTED DURING TRAINING!!!)
        :return:
        """
        if self.training and oracle_rois is not None:
            raise NotImplementedError(
                "We do not support using oracle ROIs during training phase.")
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        if not self.printed:
            print("base_feat: {}".format(base_feat.shape))

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)
        if not self.printed:
            print("type of rois: {}".format(type(rois)))
            print("rois: {}".format(rois.shape))  # 1 X num objects X 5

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        if oracle_rois is not None:
            rois = torch.from_numpy(oracle_rois).float()
            rois = torch.unsqueeze(rois, dim=0)

        if not self.printed:
            print("rois.Variable.shape: {}".format(rois.shape))
            print("rois: {}".format(rois))

        rois = Variable(rois).cuda()

        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        if not self.printed:
            print("pooled_feat.shape: {}".format(pooled_feat.shape))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)

        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        self.printed = True

        if not return_feats:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
        else:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, \
                   pooled_feat
Esempio n. 24
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \
            RCNN_loss_cls, RCNN_loss_bbox, rois_label \
            = self.FRCN(im_data, im_info, gt_boxes, num_boxes)

        # get global and local region from Faster R-CNN

        base_feat = self.FRCN.RCNN_base(im_data)

        #print(rois.data.cpu().numpy())
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        box_deltas = self.FRCN._bbox_pred.data

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if self.class_agnostic:
                if self.use_cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda(
                        ) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS) * torch.FlaotTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if self.use_cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda(
                        ) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torhc.FlaotTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(self.classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        # get global region
        thresh = 0.01

        region_g = np.ndarray((0, 5))
        region_l = np.ndarray((0, 5))
        for j in range(1, 4):
            inds = torch.nonzero(scores[:, j] >= thresh).view(-1)
            inds_l = torch.nonzero(scores[:, j + 3] >= thresh).view(-1)
            #print(inds)
            if inds.numel() > 0 and inds_l.numel() > 0:
                cls_scores = scores[:, j][inds]
                cls_scores_l = scores[:, j + 3][inds_l]
                #print(cls_scores)
                #print(cls_scores_l)
                _, order = torch.sort(cls_scores, 0, True)
                _, order_l = torch.sort(cls_scores_l, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds]
                    cls_boxes_l = pred_boxes[inds_l]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                    cls_boxes_l = pred_boxes[inds_l][:,
                                                     (j + 3) * 4:(j + 4) * 4]
                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets_l = torch.cat(
                    (cls_boxes_l, cls_scores_l.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
                cls_dets_l = cls_dets_l[order]

                region_g = np.vstack((region_g, cls_dets))
                region_l = np.vstack((region_l, cls_dets_l))
                """
                keep = nms(cls_dets, 0.9, force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                keep = nms(cls_dets_l, 0.9, force_cpu=not cfg.USE_GPU_NMS)
                cls_dets_l = cls_dets_l[keep.view(-1).long()]

                cls_dets = cls_dets[order]
                cls_dets_l = cls_dets_l[order_l]

                sort_ind = np.argsort(cls_dets[...,-1])
                high_ind = sort_ind[-self.minibatch//2:]
                low_ind = sort_ind[:self.minibatch//2]
                region_g = np.vstack((region_g. cls_dets[high_ind]))
                region_g = np.vstack((region_g, cls_dets[low_ind]))]

                sort_ind = np.argsort(cls_dets_l[..., -1])
                high_ind = sort_ind[-self.minibatch//2:]
                low_ind = sort_ind[:self.minibatch//2]
                region_l = np.vstack((region_l, cls_dets_l[high_ind]))
                reigon_l = np.vstack((region_l, cls_dets_l[low_ind]))
                """
                #region_g = np.vstack((region_g, cls_dets[np.argmax(cls_dets[..., -1])]))
                #region_l = np.vstack((region_l, cls_dets_l[np.argmax(cls_dets_l[..., -1])]))

        if not self.training:
            self.minibatch = 1

        if self.training:
            keep = nms(torch.tensor(region_g).cuda(),
                       0.9,
                       force_cpu=not cfg.USE_GPU_NMS)
            if type(keep) is not list:
                keep = keep.view(-1).long()
            region_g = region_g[keep]
            sort_ind = np.argsort(region_g[..., -1])
            high_ind_g = sort_ind[-self.minibatch // 2:]
            low_ind_g = sort_ind[:self.minibatch // 2]

            keep = nms(torch.tensor(region_l).cuda(),
                       0.9,
                       force_cpu=not cfg.USE_GPU_NMS)
            if type(keep) is not list:
                keep = keep.view(-1).long()
            region_l = region_l[keep]
            sort_ind = np.argsort(region_l[..., -1])
            high_ind_l = sort_ind[-self.minibatch // 2:]
            low_ind_l = sort_ind[:self.minibatch // 2]

            high_num = min(len(high_ind_g), len(high_ind_l))
            high_ind_g = high_ind_g[:high_num]
            high_ind_l = high_ind_l[:high_num]

            low_num = min(len(low_ind_g), len(low_ind_l))
            low_ind_g = low_ind_g[:low_num]
            low_ind_l = low_ind_l[:low_num]

            proposal_g = np.vstack((region_g[high_ind_g], region_g[low_ind_g]))
            proposal_l = np.vstack((region_l[high_ind_l], region_l[low_ind_l]))

            #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g)
            #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l)

            gt_boxes = gt_boxes.cpu().numpy()[0, :2]

            gt_g = gt_boxes[np.where(gt_boxes[..., -1] < 4)[0]]
            gt_l = gt_boxes[np.where(gt_boxes[..., -1] >= 4)[0]]

            # compute pare ground truth
            def compute_iou(ps, gt, th=0.5):
                iou_x1 = np.maximum(ps[..., 0], gt[0])
                iou_y1 = np.maximum(ps[..., 1], gt[1])
                iou_x2 = np.minimum(ps[..., 2], gt[2])
                iou_y2 = np.minimum(ps[..., 3], gt[3])
                iou_w = np.maximum(iou_x2 - iou_x1, 0)
                iou_h = np.maximum(iou_y2 - iou_y1, 0)
                iou_area = iou_w * iou_h
                gt_area = (gt[2] - gt[0]) * (gt[3] - gt[1])
                p_area = (ps[..., 2] - ps[..., 0]) * (ps[..., 3] - ps[..., 1])
                overlap = iou_area / (gt_area + p_area - iou_area)
                count = np.zeros((ps.shape[0]), dtype=int)
                count[overlap >= self.gt_iou] += 1
                return count

            cou = compute_iou(proposal_g, gt_g[0]) + compute_iou(
                proposal_l, gt_l[0])

            ## 2019.2.13
            #glcc_gt = np.zeros((proposal_g.shape[0]), dtype=int)
            #gilcc_gt[cou==2] = gt_g[0,-1]
            glcc_gt = np.array([gt_g[0, -1]], dtype=int)
            glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda()
            self.glcc_gt.data.resize_(glcc_gt.size()).copy_(glcc_gt)

        else:
            # test phase
            proposal_g = region_g[np.argmax(region_g[..., -1])][None, ...]
            proposal_l = region_l[np.argmax(region_l[..., -1])][None, ...]
            #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g.size())
            #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l.size())

        # if true, then show detection global and local region
        if False:
            gt_boxes = gt_boxes.astype(np.int)
            im = im_data.cpu().numpy()[0]
            im = np.transpose(im, (1, 2, 0))[..., ::-1]
            im -= im.min()
            im /= im.max()
            plt.imshow(im.astype(np.float))
            ax = plt.axes()
            ax.add_patch(
                plt.Rectangle((region_g[0, 0], region_g[0, 1]),
                              region_g[0, 2] - region_g[0, 0],
                              region_g[0, 3] - region_g[0, 1],
                              fill=False,
                              edgecolor='red',
                              linewidth=1))

            ax.add_patch(
                plt.Rectangle((region_l[0, 0], region_l[0, 1]),
                              region_l[0, 2] - region_l[0, 0],
                              region_l[0, 3] - region_l[0, 1],
                              fill=False,
                              edgecolor='yellow',
                              linewidth=1))

            ax.add_patch(
                plt.Rectangle((gt_boxes[0, 0], gt_boxes[0, 1]),
                              gt_boxes[0, 2] - gt_boxes[0, 0],
                              gt_boxes[0, 3] - gt_boxes[0, 1],
                              fill=False,
                              edgecolor='green',
                              linewidth=1))
            ax.add_patch(
                plt.Rectangle((gt_boxes[1, 0], gt_boxes[1, 1]),
                              gt_boxes[1, 2] - gt_boxes[1, 0],
                              gt_boxes[1, 3] - gt_boxes[1, 1],
                              fill=False,
                              edgecolor='white',
                              linewidth=1))
            plt.show()

        rois_g = np.zeros((1, proposal_g.shape[0], 5), dtype=np.float32)
        rois_g[0, :, 1:5] = proposal_g[:, :4]
        #rois_g /= 16.
        rois_l = np.zeros((1, proposal_l.shape[0], 5), dtype=np.float32)
        rois_l[0, :, 1:5] = proposal_l[:, :4]
        #rois_l /= 16.
        rois_g = torch.tensor(rois_g, dtype=torch.float).cuda()
        rois_l = torch.tensor(rois_l, dtype=torch.float).cuda()
        self.rois_g.data.resize_(rois_g.size()).copy_(rois_g)
        self.rois_l.data.resize_(rois_l.size()).copy_(rois_l)
        # global region
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(self.rois_g.view(-1, 5),
                                       base_feat.size()[2:],
                                       self.FRCN.grid_size)
            grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]],
                                  3).contiguous()
            pooled_feat_g = self.FRCN.RCNN_roi_crop(base_feat,
                                                    Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_g = F.max_pool2d(pooled_feat_g, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_g = self.FRCN.RCNN_roi_align(base_feat,
                                                     self.rois_g.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_g = self.FRCN.RCNN_roi_pool(base_feat,
                                                    self.rois_g.view(-1, 5))

        # local region
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(self.rois_l.view(-1, 5),
                                       base_feat.size()[2:],
                                       self.FRCN.grid_size)
            grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]],
                                  3).contiguous()
            pooled_feat_l = self.FRCN.RCNN_roi_crop(base_feat,
                                                    Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_l = F.max_pool2d(pooled_feat_l, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_l = self.FRCN.RCNN_roi_align(base_feat,
                                                     self.rois_l.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_l = self.FRCN.RCNN_roi_pool(base_feat,
                                                    self.rois_l.view(-1, 5))

        #print(pooled_feat_g.cpu().detach().numpy().shape)
        x = torch.cat((pooled_feat_g, pooled_feat_l), dim=1)
        #print(x.cpu().detach().numpy().shape)
        x = self.glcc_conv1(x)
        x = F.relu(x)
        x = x.view(-1, self.roipool * self.roipool * 512)
        x = self.glcc_fc1(x)
        x = F.relu(x)
        x = nn.Dropout()(x)
        x = self.glcc_fc2(x)
        x = F.relu(x)
        x = nn.Dropout()(x)
        glcc_out = self.glcc_fc_out(x)

        if self.training:
            glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda()
            glcc_loss = F.cross_entropy(glcc_out, self.glcc_gt)
        else:
            glcc_loss = 0.

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, glcc_out, glcc_loss
Esempio n. 25
0
    def forward(
        self,
        im_data,
        im_info,
        im_cls_lb,
        gt_boxes,
        num_boxes,
        need_backprop,
        tgt_im_data,
        tgt_im_info,
        tgt_gt_boxes,
        tgt_num_boxes,
        tgt_need_backprop,
    ):

        if not (need_backprop.detach() == 1 and tgt_need_backprop.detach() == 0):
            need_backprop = torch.Tensor([1]).cuda()
            tgt_need_backprop = torch.Tensor([0]).cuda()

        assert need_backprop.detach() == 1 and tgt_need_backprop.detach() == 0

        batch_size = im_data.size(0)
        im_info = im_info.data  # (size1,size2, image ratio(new image / source image) )
        im_cls_lb = im_cls_lb.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data
        need_backprop = need_backprop.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        cls_feat = self.conv_lst(self.avg_pool(base_feat)).squeeze(-1).squeeze(-1)
        img_cls_loss = nn.BCEWithLogitsLoss()(cls_feat, im_cls_lb)

        # feed base feature map tp RPN to obtain rois
        self.RCNN_rpn.train()
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes
        )

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2))
            )

        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == "align":
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == "pool":
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(
                bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4
            )
            bbox_pred_select = torch.gather(
                bbox_pred_view,
                1,
                rois_label.view(rois_label.size(0), 1, 1).expand(
                    rois_label.size(0), 1, 4
                ),
            )
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(
                bbox_pred, rois_target, rois_inside_ws, rois_outside_ws
            )

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        """ =================== for target =========================="""

        tgt_batch_size = tgt_im_data.size(0)
        tgt_im_info = (
            tgt_im_info.data
        )  # (size1,size2, image ratio(new image / source image) )
        tgt_gt_boxes = tgt_gt_boxes.data
        tgt_num_boxes = tgt_num_boxes.data
        tgt_need_backprop = tgt_need_backprop.data

        # feed image data to base model to obtain base feature map
        tgt_base_feat = self.RCNN_base(tgt_im_data)

        # feed base feature map tp RPN to obtain rois
        self.RCNN_rpn.eval()
        tgt_rois, tgt_rpn_loss_cls, tgt_rpn_loss_bbox = self.RCNN_rpn(
            tgt_base_feat, tgt_im_info, tgt_gt_boxes, tgt_num_boxes
        )

        # if it is training phrase, then use ground trubut bboxes for refining

        tgt_rois_label = None
        tgt_rois_target = None
        tgt_rois_inside_ws = None
        tgt_rois_outside_ws = None
        tgt_rpn_loss_cls = 0
        tgt_rpn_loss_bbox = 0

        tgt_rois = Variable(tgt_rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == "crop":
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            tgt_grid_xy = _affine_grid_gen(
                tgt_rois.view(-1, 5), tgt_base_feat.size()[2:], self.grid_size
            )
            tgt_grid_yx = torch.stack(
                [tgt_grid_xy.data[:, :, :, 1], tgt_grid_xy.data[:, :, :, 0]], 3
            ).contiguous()
            tgt_pooled_feat = self.RCNN_roi_crop(
                tgt_base_feat, Variable(tgt_grid_yx).detach()
            )
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                tgt_pooled_feat = F.max_pool2d(tgt_pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == "align":
            tgt_pooled_feat = self.RCNN_roi_align(tgt_base_feat, tgt_rois.view(-1, 5))
        elif cfg.POOLING_MODE == "pool":
            tgt_pooled_feat = self.RCNN_roi_pool(tgt_base_feat, tgt_rois.view(-1, 5))

        # feed pooled features to top model
        tgt_pooled_feat = self._head_to_tail(tgt_pooled_feat)

        if tgt_pooled_feat.shape[0] > pooled_feat.shape[0]:
            tgt_pooled_feat = tgt_pooled_feat[: pooled_feat.shape[0]]
        """  DA loss   """

        # DA LOSS
        DA_img_loss_cls = 0
        DA_ins_loss_cls = 0

        tgt_DA_img_loss_cls = 0
        tgt_DA_ins_loss_cls = 0

        base_score, base_label = self.RCNN_imageDA(base_feat, need_backprop)

        # Image DA
        base_prob = F.log_softmax(base_score, dim=1)
        DA_img_loss_cls = F.nll_loss(base_prob, base_label)

        instance_sigmoid, same_size_label = self.RCNN_instanceDA(
            pooled_feat, need_backprop
        )
        instance_loss = nn.BCELoss()
        DA_ins_loss_cls = instance_loss(instance_sigmoid, same_size_label)

        # consistency_prob = torch.max(F.softmax(base_score, dim=1),dim=1)[0]
        consistency_prob = F.softmax(base_score, dim=1)[:, 1, :, :]
        consistency_prob = torch.mean(consistency_prob)
        consistency_prob = consistency_prob.repeat(instance_sigmoid.size())

        DA_cst_loss = self.consistency_loss(instance_sigmoid, consistency_prob.detach())

        """  ************** taget loss ****************  """

        tgt_base_score, tgt_base_label = self.RCNN_imageDA(
            tgt_base_feat, tgt_need_backprop
        )

        # Image DA
        tgt_base_prob = F.log_softmax(tgt_base_score, dim=1)
        tgt_DA_img_loss_cls = F.nll_loss(tgt_base_prob, tgt_base_label)

        tgt_instance_sigmoid, tgt_same_size_label = self.RCNN_instanceDA(
            tgt_pooled_feat, tgt_need_backprop
        )
        tgt_instance_loss = nn.BCELoss()

        tgt_DA_ins_loss_cls = tgt_instance_loss(
            tgt_instance_sigmoid, tgt_same_size_label
        )

        tgt_consistency_prob = F.softmax(tgt_base_score, dim=1)[:, 0, :, :]
        tgt_consistency_prob = torch.mean(tgt_consistency_prob)
        tgt_consistency_prob = tgt_consistency_prob.repeat(tgt_instance_sigmoid.size())

        tgt_DA_cst_loss = self.consistency_loss(
            tgt_instance_sigmoid, tgt_consistency_prob.detach()
        )

        return (
            rois,
            cls_prob,
            bbox_pred,
            img_cls_loss,
            rpn_loss_cls,
            rpn_loss_bbox,
            RCNN_loss_cls,
            RCNN_loss_bbox,
            rois_label,
            DA_img_loss_cls,
            DA_ins_loss_cls,
            tgt_DA_img_loss_cls,
            tgt_DA_ins_loss_cls,
            DA_cst_loss,
            tgt_DA_cst_loss,
        )
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        # base_feat = self.RCNN_base(im_data)
        base_feat_conv3 = self.RCNN_base(im_data)
        base_feat_conv4 = self.RCNN_conv4(base_feat_conv3)
        base_feat_conv5 = self.RCNN_conv5(base_feat_conv4)
        ############
        x_o3 = self.conv3(base_feat_conv3)
        x_o4 = self.conv4(base_feat_conv4)
        x_o5 = self.conv5(base_feat_conv5)
        x_o = x_o3 + x_o4 + x_o5

        x_e1 = self.e1(x_o)
        x_e2 = self.e2(x_e1)
        x = self.e3(x_e2)
        x = self.d1_deconv(x, output_size=x_e2.size())
        x = self.d1(x)
        x = self.d2_deconv(torch.cat([x_e2, x], 1),
                           output_size=base_feat_conv5.size())
        x = self.d2(x)
        x = self.d3(torch.cat([x_e1, x], 1))
        base_feat = self.d4(x)
        #############

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Esempio n. 27
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes, need_backprop=None, dc_label=None):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        if self.training: # target:0 / source:1 / distorted:2
            DA_loss_img = 0.1 * self.Dis(GradReverse.apply(base_feat), dc_label)
        else:
            DA_loss_img = 0

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes, need_backprop)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            if need_backprop.numpy():
                roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
                rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
                # print(rois_label)
                rois_label = Variable(rois_label.view(-1).long())
                rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
                rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
                rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
            else:
                rois_label = None
                rois_target = None
                rois_inside_ws = None
                rois_outside_ws = None
                rpn_loss_cls = 0
                rpn_loss_bbox = 0
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
        rois = Variable(rois)

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            if need_backprop.numpy():
                # select the corresponding columns according to roi labels
                bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
                bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
                bbox_pred = bbox_pred_select.squeeze(1) # gathers rois of the correspond class via rois_label

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            if need_backprop.numpy():
                # calculate classification and b.b. regression loss only for source data
                # classification loss
                RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

                # bounding box regression L1 loss
                RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
            else:
                RCNN_loss_cls = Variable(torch.zeros(1).float().cuda())
                RCNN_loss_bbox = Variable(torch.zeros(1).float().cuda())
                rpn_loss_cls = Variable(torch.zeros(1).float().cuda())
                rpn_loss_bbox = Variable(torch.zeros(1).float().cuda())

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        
        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, DA_loss_img
Esempio n. 28
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                target=False,
                eta=1.0):

        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data
        lossQ = -1

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        # feed base feature map tp RPN to obtain rois'''
        #print("target is ",target)
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes, target)
        #print("rois.shape:",rois.shape)
        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training and not target:
            #print("source traning---------------------------")
            #print("batch_size:",batch_size)
            #print("gt_boxes.shape:",gt_boxes.shape)
            #print("num_boxes:",num_boxes.data)
            '''
            print(self.training)
            print(~target)
            print("use ground trubut bboxes for refining")'''
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            lossQ = -1

        rois = Variable(rois)
        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        #print("pooled_feat before _head_to_tail:",pooled_feat.shape)
        if self.context:
            d_instance, _ = self.netD_pixel(
                grad_reverse(pooled_feat, lambd=eta))
            #if target:
            #d_instance, _ = self.netD_pixel(grad_reverse(pooled_feat, lambd=eta))
            #return d_pixel#, diff
            d_score_total, feat = self.netD_pixel(pooled_feat.detach())
        else:
            d_score_total = self.netD_pixel(pooled_feat.detach())
            d_instance = self.netD_pixel(grad_reverse(pooled_feat, lambd=eta))
            #if target:
            #return d_pixel#,diff

        #d_score_total, _ = self.netD_pixel(pooled_feat.detach())
        #print("d_score_total.shape",d_score_total.shape)
        #print("pooled_feat.shape:",pooled_feat.shape)
        d_instance_q = d_instance.split(128, 0)

        d_score_total_q = d_score_total.split(128, 0)
        d_score_total_qs = []
        for img in range(batch_size):
            temp = torch.mean(d_score_total_q[img], dim=3)
            d_score_total_qs.append(torch.mean(temp, dim=2))

        #d_score_total = torch.mean(d_score_total,dim=3)
        #d_score_total = torch.mean(d_score_total,dim=2)
        pooled_feat = self._head_to_tail(pooled_feat)

        #print("pooled_feat.shape:",pooled_feat.shape)

        if self.training and self.S_agent:
            pooled_feat_s = pooled_feat.split(128, 0)
            for img in range(batch_size):
                pooled_feat_d = pooled_feat_s[img]
                #print("------------------begain selecting in the source-----------------------")
                select_iter = int(pooled_feat_d.shape[0] / self.candidate_num)
                total_index = list(range(0, pooled_feat_d.shape[0]))
                np.random.shuffle(total_index)
                select_index = []
                for eposide in range(select_iter):
                    #print("#################################begain batch-%d-th the %d-th eposide##################################" % (img,eposide))
                    select_list = list(range(0, self.candidate_num))
                    batch_idx = total_index[eposide *
                                            self.candidate_num:(eposide + 1) *
                                            self.candidate_num]
                    state = pooled_feat_d[batch_idx]
                    #print("state.shape:",state.shape)
                    d_score = d_score_total_qs[img][batch_idx]
                    #print("d_score.shape:",d_score.shape)
                    for it in range(self.select_num):
                        #print("#########begain the %d-th selection################" % (it))
                        epsilon = self.epsilon_by_epoch(self.iter_dqn)
                        action_index = self.current_model.act(
                            state, epsilon, select_list)
                        #print("action_index:",action_index)
                        #action_episode.append(action_index)
                        try:
                            select_list.remove(action_index)
                        except:
                            print("select_list:", select_list)
                            print("action_index:", action_index)
                            print(
                                "error!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                            continue
                        #print("the %d-th select, action_index is %d"%(it,action_index))
                        if d_score[action_index] > self.ts:
                            reward = -1
                        else:
                            reward = 1
                        #print("reward:",reward)
                        next_state = torch.tensor(state)
                        next_state[action_index] = torch.zeros(
                            1, next_state.shape[1])
                        if it == (self.select_num - 1):
                            done = 1
                        else:
                            done = 0
                        self.replay_buffer.push(state, action_index, reward,
                                                next_state, done, select_list)
                        self.iter_dqn = self.iter_dqn + 1
                        state = next_state
                    select_index = select_index + [
                        batch_idx[i] for i in select_list
                    ]
                if len(self.replay_buffer) > cfg.BATCH_SIZE_DQN:
                    lossQ = DQN.compute_td_loss(self.current_model,
                                                self.target_model,
                                                self.replay_buffer,
                                                cfg.BATCH_SIZE_DQN)
                if np.mod(self.iter_dqn, cfg.replace_target_iter) == 0:
                    DQN.update_target(self.current_model, self.target_model)
                if img == 0:
                    d_instance_refine = d_instance_q[img][select_index]
                else:
                    d_instance_refine = torch.cat(
                        (d_instance_refine, d_instance_q[img][select_index]),
                        0)
        pooled_feat_original = torch.tensor(pooled_feat)
        if self.context:
            feat = feat.view(feat.size(0), -1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic and not target:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        #print("pooled_feat.shape in faster_rcnn_global_pixel_instance:",pooled_feat.shape)
        cls_score = self.RCNN_cls_score(pooled_feat)

        cls_prob = F.softmax(cls_score, 1)
        #print("cls_prob is ",cls_prob.shape)

        if self.training and target and self.T_agent:
            pooled_feat_t = pooled_feat_original.split(128, 0)
            for img in range(batch_size):
                pooled_feat_d = pooled_feat_t[img]

                select_iter_T = int(pooled_feat_d.shape[0] /
                                    self.candidate_num)
                #print("select_iter_T:",select_iter_T)
                total_index_T = list(range(0, pooled_feat_d.shape[0]))
                np.random.shuffle(total_index_T)
                #print("gt_label:",gt_label)
                #print("total_index:",len(total_index))
                select_index_T = []
                for eposide_T in range(select_iter_T):
                    select_list_T = list(range(0, self.candidate_num))
                    batch_idx_T = total_index_T[eposide_T *
                                                self.candidate_num:(eposide_T +
                                                                    1) *
                                                self.candidate_num]
                    state_T = pooled_feat_d[batch_idx_T]
                    d_score_T = d_score_total_qs[img][batch_idx_T]
                    #print("label_pre:",label_pre)
                    for it in range(self.select_num):
                        epsilon_T = self.epsilon_by_epoch_T(self.iter_dqn_T)
                        action_index_T = self.current_model_T.act(
                            state_T, epsilon_T, select_list_T)
                        #select_list_T.remove(action_index_T)
                        try:
                            select_list_T.remove(action_index_T)
                        except:
                            print("select_list_T:", select_list_T)
                            print("action_index:", action_index_T)
                            print(
                                "error!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                            continue
                        #print("label_pre[action_index_T]:",label_pre[action_index_T])
                        #print("torch.eq(gt_label,label_pre[action_index_T]):",torch.eq(gt_label,label_pre[action_index_T]))
                        if d_score_T[action_index_T] > self.tt:
                            reward = 1
                        else:
                            reward = -1
                        #print("D_score:",d_score_T[action_index_T][1],"reward:",reward)
                        next_state_T = torch.tensor(state_T)
                        next_state_T[action_index_T] = torch.zeros(
                            1, next_state_T.shape[1])
                        if it == (self.select_num - 1):
                            done = 1
                        else:
                            done = 0
                        self.replay_buffer_T.push(state_T, action_index_T,
                                                  reward, next_state_T, done,
                                                  select_list_T)
                        self.iter_dqn_T = self.iter_dqn_T + 1
                        state_T = next_state_T
                        #print("select_list_T:",select_list_T)
                        #if len(self.replay_buffer_T)>cfg.BATCH_SIZE_DQN:
                        #    lossQ = DQN.compute_td_loss(self.current_model_T,self.target_model_T,self.replay_buffer_T,cfg.BATCH_SIZE_DQN)
                        #if np.mod(self.iter_dqn_T,cfg.replace_target_iter)==0:
                        #    DQN.update_target(self.current_model_T,self.target_model_T)
                    select_index_T = select_index_T + [
                        batch_idx_T[i] for i in select_list_T
                    ]
                if len(self.replay_buffer_T) > cfg.BATCH_SIZE_DQN:
                    lossQ = DQN.compute_td_loss(self.current_model_T,
                                                self.target_model_T,
                                                self.replay_buffer_T,
                                                cfg.BATCH_SIZE_DQN)
                if np.mod(self.iter_dqn_T, cfg.replace_target_iter) == 0:
                    DQN.update_target(self.current_model_T,
                                      self.target_model_T)
                #d_instance = d_instance[select_index_T]
                if img == 0:
                    d_instance_refine = d_instance_q[img][select_index_T]
                else:
                    d_instance_refine = torch.cat(
                        (d_instance_refine, d_instance_q[img][select_index_T]),
                        0)

        if target:
            return d_instance_refine, lossQ

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        if self.S_agent:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_instance_refine, lossQ  #,diff
        else:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_instance, lossQ
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                flag=True,
                rpn_bs=128):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # conv1 feature map
        conv1_feat = self.Conv1(im_data)

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(conv1_feat)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes,
                                                 rpn_bs)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        ################################################
        # define generator network
        ################################################
        #import ipdb
        #ipdb.set_trace()
        if flag == False:
            # import ipdb
            # ipdb.set_trace()
            downsample_conv1_feat = self.downsample(conv1_feat)
            conv1_pooled_feat = self.RCNN_roi_pool_conv1(
                downsample_conv1_feat, rois.view(-1, 5))
            residual_feat = self.residualblock(conv1_pooled_feat)
            pooled_feat = residual_feat + pooled_feat

        # feed pooled features to top model
        rcnn_pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(rcnn_pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(rcnn_pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)
            rois_label = torch.unsqueeze(rois_label, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                target=False,
                test=False,
                eta=1.0,
                hints=False):
        if test:
            self.training = False
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        if self.dc == 'swda':
            if self.lc:
                d_pixel, _ = self.netD_pixel(
                    grad_reverse(base_feat1, lambd=eta))
                # print(d_pixel)
                if not target:
                    _, feat_pixel = self.netD_pixel(base_feat1.detach())
            else:
                d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
        base_feat = self.RCNN_base2(base_feat1)
        if self.dc == 'vanilla':
            domain = self.netD_dc(grad_reverse(base_feat, lambd=eta))
            if target:
                return None, domain
        elif self.dc == 'swda':
            if self.gc:
                domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
                if target:
                    return d_pixel, domain_p
                _, feat = self.netD(base_feat.detach())
            else:
                domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
                if target:
                    return d_pixel, domain_p
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        #feat_pixel = torch.zeros(feat_pixel.size()).cuda()
        if self.lc:
            feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
        if self.gc:
            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)
            # compute bbox offset

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)
        if self.conf:
            # confidence
            confidence = F.sigmoid(self.netD_confidence(pooled_feat))
            # Make sure we don't have any numerical instability
            eps = 1e-12
            pred_original = torch.clamp(cls_prob, 0. + eps, 1. - eps)
            confidence = torch.clamp(confidence, 0. + eps, 1. - eps)
            confidence_loss = (-torch.log(confidence))

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            if self.conf and hints:
                # Randomly set half of the confidences to 1 (i.e. no hints)
                b = torch.bernoulli(
                    torch.Tensor(confidence.size()).uniform_(0, 1)).cuda()
                conf = confidence * b + (1 - b)
                labels_onehot = encode_onehot(rois_label,
                                              pred_original.size(1))
                pred_new = pred_original * conf.expand_as(pred_original) + \
                    labels_onehot * (1 - conf.expand_as(labels_onehot))
                pred_new = torch.log(pred_new)
                RCNN_loss_cls = F.nll_loss(pred_new, rois_label)

            else:
                RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        if test:
            self.training = True
        if self.dc == 'swda' and self.conf is None:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p, None, None
        elif self.dc == 'vanilla' and self.conf is None:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, domain, None, None
        elif self.conf and self.dc is None:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, None, confidence_loss, confidence
        elif self.conf and self.dc == "swda":
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p, confidence_loss, confidence
        elif self.conf and self.dc == "vanilla":
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, domain, confidence_loss, confidence
        else:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, None, None, None
Esempio n. 31
0
    def forward(
        self,
        src_im_data,
        src_im_info,
        src_im_cls_lb,
        src_gt_boxes,
        src_num_boxes,
        src_need_backprop,
        _tgt_im_data,
        _tgt_im_info,
        _tgt_gt_boxes,
        _tgt_num_boxes,
        _tgt_need_backprop,
        ft_im_data,
        ft_im_info,
        ft_im_cls_lb,
        ft_gt_boxes,
        ft_num_boxes,
        ft_need_backprop,
        fs_im_data,
        fs_im_info,
        fs_gt_boxes,
        fs_num_boxes,
        fs_need_backprop,
        weight_value=1.0,
    ):


        #concate src_im_data and ft_im_data
        im_data = torch.cat([src_im_data, ft_im_data], dim=0)
        im_info = torch.cat([src_im_info, ft_im_info], dim=0)
        im_cls_lb = torch.cat([src_im_cls_lb, ft_im_cls_lb], dim=0)
        gt_boxes = torch.cat([src_gt_boxes, ft_gt_boxes], dim=0)
        num_boxes = torch.cat([src_num_boxes, ft_num_boxes], dim=0)
        need_backprop = torch.cat([src_need_backprop, ft_need_backprop], dim=0)

        batch_size = im_data.size(0)
        im_info = im_info.data
        im_cls_lb =  im_cls_lb.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data
        need_backprop = need_backprop.data

        base_feat = self.RCNN_base(im_data)
        cls_feat = self.conv_lst(self.avg_pool(base_feat)).squeeze(-1).squeeze(-1)
        img_cls_loss = nn.BCEWithLogitsLoss()(cls_feat, im_cls_lb)

        #for image-level contra loss
        contra_src_loss = self.compute_contra_loss(base_feat)


        #concate _tgt_data and fs_im_data
        tgt_im_data = torch.cat([_tgt_im_data, fs_im_data], dim=0)
        tgt_im_info = torch.cat([_tgt_im_info, fs_im_info], dim=0)
        tgt_gt_boxes = torch.cat([_tgt_gt_boxes, fs_gt_boxes], dim=0)
        tgt_num_boxes = torch.cat([_tgt_num_boxes, fs_num_boxes], dim=0)
        tgt_need_backprop = torch.cat([_tgt_need_backprop, fs_need_backprop], dim=0)

        tgt_batch_size = tgt_im_data.size(0)
        tgt_im_info = tgt_im_info.data
        tgt_gt_boxes = tgt_gt_boxes.data
        tgt_num_boxes = tgt_num_boxes.data
        tgt_need_backprop = tgt_need_backprop.data

        # feed base feature map tp RPN to obtain rois
        self.RCNN_rpn.train()
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes
        )

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2))
            )

        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == "align":
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == "pool":
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(
                bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4
            )
            bbox_pred_select = torch.gather(
                bbox_pred_view,
                1,
                rois_label.view(rois_label.size(0), 1, 1).expand(
                    rois_label.size(0), 1, 4
                ),
            )
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0
        # ins_contra_loss = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(
                bbox_pred, rois_target, rois_inside_ws, rois_outside_ws
            )

        #for probability invariance
        invar_num = 60
        invar_index = np.random.choice(rois.size(1), size=invar_num)
        invar_rois = torch.zeros((rois.size(0) * invar_num, rois.size(2))).cuda()
        for i in range(batch_size):
            for j in range(invar_num):
                invar_rois[i*invar_num + j] = rois[i][invar_index[j]]
        invar_rois = torch.cat([invar_rois.unsqueeze(0), invar_rois.unsqueeze(0)], dim=0)

        if cfg.POOLING_MODE == "align":
            invar_pooled_feat = self.RCNN_roi_align(base_feat, invar_rois.view(-1, 5))
        elif cfg.POOLING_MODE == "pool":
            invar_pooled_feat = self.RCNN_roi_pool(base_feat, invar_rois.view(-1, 5))

        # feed pooled features to top model
        invar_pooled_feat = self._head_to_tail(invar_pooled_feat)
        # compute object classification probability
        invar_cls_score = self.RCNN_cls_score(invar_pooled_feat)
        invar_cls_prob = F.softmax(invar_cls_score, 1)
        invar_cls_prob = invar_cls_prob.view(batch_size, -1, invar_cls_prob.size(1))
        s_invar_cls_prob = invar_cls_prob[:1].squeeze(0)
        ft_invar_cls_prob = invar_cls_prob[1:].squeeze(0)
        invar_kdl_loss = self.KLDistance(s_invar_cls_prob, ft_invar_cls_prob)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        """ =================== for target =========================="""

        tgt_batch_size = tgt_im_data.size(0)
        tgt_im_info = (
            tgt_im_info.data
        )  # (size1,size2, image ratio(new image / source image) )
        tgt_gt_boxes = tgt_gt_boxes.data
        tgt_num_boxes = tgt_num_boxes.data
        tgt_need_backprop = tgt_need_backprop.data

        # feed image data to base model to obtain base feature map
        tgt_base_feat = self.RCNN_base(tgt_im_data)

        contra_tgt_loss = self.compute_contra_loss(tgt_base_feat)


        tgt_img_cls_feat = self.conv_lst(tgt_base_feat)
        tgt_img_cls_feat = F.softmax(tgt_img_cls_feat, dim=1)
        tgt_img_cls_loss = self.entropy_loss(tgt_img_cls_feat)

        # add new code
        tgt_image_cls_feat = (
            self.conv_lst(self.avg_pool(tgt_base_feat)).squeeze(-1).squeeze(-1).detach()
        )
        # tgt_image_cls_feat = F.sigmoid(tgt_image_cls_feat[0]).detach()

        tgt_image_cls_feat = F.sigmoid(tgt_image_cls_feat).detach()
        # feed base feature map tp RPN to obtain rois
        self.RCNN_rpn.eval()
        tgt_rois, tgt_rpn_loss_cls, tgt_rpn_loss_bbox = self.RCNN_rpn(
            tgt_base_feat, tgt_im_info, tgt_gt_boxes, tgt_num_boxes
        )

        # if it is training phrase, then use ground trubut bboxes for refining

        tgt_rois_label = None
        tgt_rois_target = None
        tgt_rois_inside_ws = None
        tgt_rois_outside_ws = None
        tgt_rpn_loss_cls = 0
        tgt_rpn_loss_bbox = 0

        tgt_rois = Variable(tgt_rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == "crop":
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            tgt_grid_xy = _affine_grid_gen(
                tgt_rois.view(-1, 5), tgt_base_feat.size()[2:], self.grid_size
            )
            tgt_grid_yx = torch.stack(
                [tgt_grid_xy.data[:, :, :, 1], tgt_grid_xy.data[:, :, :, 0]], 3
            ).contiguous()
            tgt_pooled_feat = self.RCNN_roi_crop(
                tgt_base_feat, Variable(tgt_grid_yx).detach()
            )
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                tgt_pooled_feat = F.max_pool2d(tgt_pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == "align":
            tgt_pooled_feat = self.RCNN_roi_align(tgt_base_feat, tgt_rois.view(-1, 5))
        elif cfg.POOLING_MODE == "pool":
            tgt_pooled_feat = self.RCNN_roi_pool(tgt_base_feat, tgt_rois.view(-1, 5))

        # feed pooled features to top model
        tgt_pooled_feat = self._head_to_tail(tgt_pooled_feat)

        # add new code
        tgt_cls_score = self.RCNN_cls_score(tgt_pooled_feat).detach()
        tgt_prob = F.softmax(tgt_cls_score, 1).detach()
        tgt_pre_label = tgt_prob.argmax(1).detach()




        #for probability invariance for target domain, zf
        tgt_invar_num = 60
        tgt_invar_index = np.random.choice(tgt_rois.size(1), size=tgt_invar_num)
        tgt_invar_rois = torch.zeros((tgt_rois.size(0) * tgt_invar_num, tgt_rois.size(2))).cuda()
        for i in range(tgt_batch_size):
            for j in range(tgt_invar_num):
                tgt_invar_rois[i*tgt_invar_num + j] = tgt_rois[i][tgt_invar_index[j]]
        tgt_invar_rois = torch.cat([tgt_invar_rois.unsqueeze(0), tgt_invar_rois.unsqueeze(0)], dim=0)
        
        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == "align":
            tgt_invar_pooled_feat = self.RCNN_roi_align(tgt_base_feat, tgt_invar_rois.view(-1, 5))
        elif cfg.POOLING_MODE == "pool":
            tgt_invar_pooled_feat = self.RCNN_roi_pool(tgt_base_feat, tgt_invar_rois.view(-1, 5))

        # feed pooled features to top model
        tgt_invar_pooled_feat = self._head_to_tail(tgt_invar_pooled_feat)
        # compute object classification probability
        tgt_invar_cls_score = self.RCNN_cls_score(tgt_invar_pooled_feat)
        tgt_invar_cls_prob = F.softmax(tgt_invar_cls_score, 1)
        tgt_invar_cls_prob = tgt_invar_cls_prob.view(tgt_batch_size, -1, tgt_invar_cls_prob.size(1))
        t_invar_cls_prob = tgt_invar_cls_prob[:1].squeeze(0)
        fs_invar_cls_prob = tgt_invar_cls_prob[1:].squeeze(0)
        tgt_invar_kdl_loss = self.KLDistance(t_invar_cls_prob, fs_invar_cls_prob)


        """  DA loss   """

        # DA LOSS
        DA_img_loss_cls = 0
        DA_ins_loss_cls = 0

        tgt_DA_img_loss_cls = 0
        tgt_DA_ins_loss_cls = 0

        base_score, local_base_score, base_label = self.RCNN_imageDA(base_feat, need_backprop)

        # Image DA
        base_prob = F.log_softmax(base_score, dim=1)
        DA_img_loss_cls = F.nll_loss(base_prob, base_label)

        #Image DA for local
        local_DA_img_loss_cls = 0
        for i in range(self.n_classes-1):
            local_base_prob_i = F.log_softmax(local_base_score[i], dim=1)
            local_DA_img_loss_cls_i = F.nll_loss(local_base_prob_i, base_label)
            local_DA_img_loss_cls += local_DA_img_loss_cls_i
        
        local_DA_img_loss_cls = local_DA_img_loss_cls / (self.n_classes-1)

        instance_sigmoid, same_size_label = self.RCNN_instanceDA(
            pooled_feat, need_backprop
        )
        instance_loss = nn.BCELoss()
        DA_ins_loss_cls = instance_loss(instance_sigmoid, same_size_label)

        # # consistency_prob = torch.max(F.softmax(base_score, dim=1),dim=1)[0]
        # consistency_prob = F.softmax(base_score, dim=1)[:, 1, :, :]
        # consistency_prob = torch.mean(consistency_prob)
        # consistency_prob = consistency_prob.repeat(instance_sigmoid.size())

        # DA_cst_loss = self.consistency_loss(instance_sigmoid, consistency_prob.detach())

        #new consistency prob, zf
        DA_cst_loss = 0
        consistency_prob = F.softmax(base_score, dim=1)[:, 1, :, :]
        da_instance_sigmoid = instance_sigmoid.view(batch_size, -1,1)
        for i in range(batch_size):
            consistency_prob_i = torch.mean(consistency_prob[i])
            da_instance_sigmoid_i = da_instance_sigmoid[i]
            consistency_prob_i = consistency_prob_i.repeat(da_instance_sigmoid_i.size())
            DA_cst_loss_i = self.consistency_loss(da_instance_sigmoid_i, consistency_prob_i.detach())

            DA_cst_loss += DA_cst_loss_i
        DA_cst_loss = DA_cst_loss / batch_size


        """  ************** taget loss ****************  """

        tgt_base_score, tgt_local_base_score, tgt_base_label = self.RCNN_imageDA(
            tgt_base_feat, tgt_need_backprop
        )

        # Image DA
        tgt_base_prob = F.log_softmax(tgt_base_score, dim=1)
        tgt_DA_img_loss_cls = F.nll_loss(tgt_base_prob, tgt_base_label)

        tgt_instance_sigmoid, tgt_same_size_label = self.RCNN_instanceDA(
            tgt_pooled_feat, tgt_need_backprop
        )

        #Image DA for local
        tgt_local_DA_img_loss_cls = 0
        for i in range(self.n_classes-1):
            tgt_local_base_prob_i = F.log_softmax(tgt_local_base_score[i], dim=1)
            tgt_local_DA_img_loss_cls_i = F.nll_loss(tgt_local_base_prob_i, tgt_base_label)
            tgt_local_DA_img_loss_cls += tgt_local_DA_img_loss_cls_i
        
        tgt_local_DA_img_loss_cls = tgt_local_DA_img_loss_cls/(self.n_classes-1)

        # add new code
        target_weight = []
        tgt_rois_num_each = int(len(tgt_pre_label)/tgt_batch_size)
        tgt_image_cls_feat_index = -1
        for i in range(len(tgt_pre_label)):
            #zf
            if i % tgt_rois_num_each == 0:
                tgt_image_cls_feat_index +=1 

            label_i = tgt_pre_label[i].item()
            if label_i > 0:
                diff_value = torch.exp(
                    weight_value
                    * torch.abs(tgt_image_cls_feat[tgt_image_cls_feat_index][label_i - 1] - tgt_prob[i][label_i])
                ).item()
                target_weight.append(diff_value)
            else:
                target_weight.append(1.0)

        tgt_instance_loss = nn.BCELoss(
            weight=torch.Tensor(target_weight).view(-1, 1).cuda()
        )

        tgt_DA_ins_loss_cls = tgt_instance_loss(
            tgt_instance_sigmoid, tgt_same_size_label
        )

        # tgt_consistency_prob = F.softmax(tgt_base_score, dim=1)[:, 0, :, :]
        # tgt_consistency_prob = torch.mean(tgt_consistency_prob)
        # tgt_consistency_prob = tgt_consistency_prob.repeat(tgt_instance_sigmoid.size())

        # tgt_DA_cst_loss = self.consistency_loss(
        #     tgt_instance_sigmoid, tgt_consistency_prob.detach()
        # )

        #consistency_prob for batch, zf
        tgt_DA_cst_loss = 0
        tgt_consistency_prob = F.softmax(tgt_base_score, dim=1)[:, 0, :, :]
        tgt_da_instance_sigmoid = tgt_instance_sigmoid.view(tgt_batch_size, -1,1)
        for i in range(tgt_batch_size):
            tgt_consistency_prob_i = torch.mean(tgt_consistency_prob[i])
            tgt_da_instance_sigmoid_i = tgt_da_instance_sigmoid[i]
            tgt_consistency_prob_i = tgt_consistency_prob_i.repeat(tgt_da_instance_sigmoid_i.size())
            tgt_DA_cst_loss_i = self.consistency_loss(tgt_da_instance_sigmoid_i, tgt_consistency_prob_i.detach())

            tgt_DA_cst_loss += tgt_DA_cst_loss_i
        tgt_DA_cst_loss = tgt_DA_cst_loss / tgt_batch_size

        return (
            rois,
            cls_prob,
            bbox_pred,
            img_cls_loss,
            tgt_img_cls_loss,
            contra_src_loss,
            contra_tgt_loss,
            rpn_loss_cls,
            rpn_loss_bbox,
            RCNN_loss_cls,
            # tgt_RCNN_loss_cls,
            RCNN_loss_bbox,
            rois_label,
            invar_kdl_loss,
            tgt_invar_kdl_loss,
            DA_img_loss_cls,
            local_DA_img_loss_cls,
            DA_ins_loss_cls,
            tgt_DA_img_loss_cls,
            tgt_local_DA_img_loss_cls,
            tgt_DA_ins_loss_cls,
            DA_cst_loss,
            tgt_DA_cst_loss,
        )
    def forward(self, im_data, im_info, gt_boxes, num_boxes):

        #print('gt_number1:  ' + str(int(torch.sum(gt_boxes[0,:,4]==1))))
        #print('gt_number2:  ' + str(int(torch.sum(gt_boxes[1,:,4]==1))))
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)
        #import pdb
        #pdb.set_trace()
        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws, gt_rois = roi_data

            #import pdb
            #pdb.set_trace()

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        #import pdb
        #pdb.set_trace()

        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:

            # classification loss
            #RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            # import pdb
            #pdb.set_trace()

            loss_cls = torch.Tensor(rois_label.size(0)).cuda()
            for i in range(rois_label.shape[0]):
                loss_cls[i] = F.cross_entropy(cls_score[i].view(1, 2),
                                              rois_label[i].view(1))

            RCNN_loss_cls = torch.mean(
                torch.cat((loss_cls[rois_label == 0].sort(
                    0, True)[0][:int(loss_cls[rois_label == 0].size(0) * 3 /
                                     10)], loss_cls[rois_label == 1]), 0))

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)
            #import pdb
            #pdb.set_trace()

            RCNN_loss_repgt, RCNN_loss_repbox = repulsion(
                rois, bbox_pred, gt_rois, rois_inside_ws, rois_outside_ws)
            #print(RCNN_loss_repgt)
            #RCNN_loss_repgt=torch.zeros(1).cuda()

            #RCNN_loss_repbox=torch.zeros(1).cuda()

            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)
            RCNN_loss_repgt = torch.unsqueeze(RCNN_loss_repgt, 0)
            RCNN_loss_repbox = torch.unsqueeze(RCNN_loss_repbox, 0)

        else:
            RCNN_loss_repgt = torch.zeros(1).cuda()

            RCNN_loss_repbox = torch.zeros(1).cuda()

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        #import pdb
        #pdb.set_trace()
        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, RCNN_loss_repgt, RCNN_loss_repbox, rois_label
Esempio n. 33
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data
        self.batch_size = im_data.size(0)

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        # Base feature
        base_feat = self.RCNN_conv_new(base_feat)

        # Local feature with PS-ROIPooling
        # Local classification
        local_cls_feat = self.RCNN_local_cls_base(base_feat)
        local_cls_feat = self.RCNN_psroi_pool_cls(local_cls_feat, rois.view(-1, 5))
        local_cls = self.avg_pooling(local_cls_feat)
        local_cls = self.RCNN_local_cls_fc(local_cls)

        # Local bbox regression
        local_bbox_feat = self.RCNN_local_bbox_base(base_feat)
        local_bbox_feat = self.RCNN_psroi_pool_loc(local_bbox_feat, rois.view(-1, 5))
        local_bbox = self.avg_pooling(local_bbox_feat)

        # Global feature with ROIPooling
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        global_base = self.RCNN_global_base(pooled_feat)
        global_cls = self.RCNN_global_cls(global_base)
        global_bbox = self.RCNN_global_bbox(global_base)

        # fusion global feature and local feature
        cls_score = (local_cls + global_cls).squeeze()
        bbox_pred = (local_bbox + global_bbox).squeeze()

        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        cls_prob = F.softmax(cls_score, dim=1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            loss_func = self.ohem_detect_loss if cfg.TRAIN.OHEM else self.detect_loss
            RCNN_loss_cls, RCNN_loss_bbox = loss_func(cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label