コード例 #1
0
 def foward_global_domain_cls(self, base_feat, eta, target):
     if self.gc:
         domain_global, _ = self.netD(grad_reverse(base_feat, lambd=eta))
         if not target:
             _, feat = self.netD(base_feat.detach())
         else:
             feat = None
     else:
         domain_global = self.netD(grad_reverse(base_feat, lambd=eta))
         feat = None
     return domain_global, feat  # , diff
コード例 #2
0
    def foward_local_domain_cls(self, base_feat, eta, target):

        if self.lc:
            d_pixel, _ = self.netD_pixel(grad_reverse(base_feat, lambd=eta))
            # print(d_pixel.mean())
            if not target:
                _, feat_pixel = self.netD_pixel(base_feat.detach())
            else:
                feat_pixel = None
        else:
            d_pixel = self.netD_pixel(grad_reverse(base_feat, lambd=eta))
            feat_pixel = None
        return d_pixel, feat_pixel
コード例 #3
0
ファイル: faster_rcnn_HTCN.py プロジェクト: Natlem/M-HTCN
    def adv_forward(self, base_feat1, base_feat2, base_feat, pooled_feat, adv_num, eta=1.0):
        if self.lc:
            d_pixel, _ = self.netD_pixels[adv_num](grad_reverse(base_feat1, lambd=eta))
            #print(d_pixel)
            # if not target:
            _, feat_pixel = self.netD_pixels[adv_num](base_feat1.detach())
        else:
            d_pixel = self.netD_pixels[adv_num](grad_reverse(base_feat1, lambd=eta))

        if self.gc:
            domain_mid, _ = self.netD_mids[adv_num](grad_reverse(base_feat2, lambd=eta))
            # if not target:
            _, feat_mid = self.netD_mids[adv_num](base_feat2.detach())
        else:
            domain_mid = self.netD_mids[adv_num](grad_reverse(base_feat2, lambd=eta))


        if self.gc:
            domain_p, _ = self.netDs[adv_num](grad_reverse(base_feat, lambd=eta))
            # if target:
            #     return d_pixel,domain_p,domain_mid#, diff
            _,feat = self.netDs[adv_num](base_feat.detach())
        else:
            domain_p = self.netDs[adv_num](grad_reverse(base_feat, lambd=eta))
        #
        feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
        feat_mid = feat_mid.view(1, -1).repeat(pooled_feat.size(0), 1)
        feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
        # concat
        feat = torch.cat((feat_mid, feat), 1)
        feat = torch.cat((feat_pixel, feat), 1)
        #
        feat_random = self.RandomLayers[adv_num]([pooled_feat, feat])
        d_ins = self.netD_das[adv_num](grad_reverse(feat_random, lambd=eta))
        return d_pixel, domain_p, domain_mid, d_ins
コード例 #4
0
 def forward(self, x):
     x_base = F.relu(self.bn0(x))
     x_base = F.avg_pool2d(x_base, (x_base.size(2), x_base.size(3)))
     x_base = x_base.view(-1, 1024)
     x = F.relu(self.bn1(self.conv1(grad_reverse(x, lambd=1.0))))
     x = F.relu(self.bn2(self.conv2(x)))
     x = F.relu(self.bn3(self.conv3(x)))
     x = F.avg_pool2d(x, (x.size(2), x.size(3)))
     x1 = x.view(-1, 256)
     x2 = self.fc(x1)
     return x_base, x1, x2
コード例 #5
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                target=False,
                eta=1.0):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        if self.context:
            domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
                return domain_p  #, diff
            _, feat = self.netD(base_feat.detach())
        else:
            domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return domain_p  #,diff
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        if self.context:
            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)
        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, domain_p  #,diff
コード例 #6
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                target=False,
                test=False,
                eta=1.0,
                hints=False):
        if test:
            self.training = False
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        if self.dc == 'swda':
            if self.lc:
                d_pixel, _ = self.netD_pixel(
                    grad_reverse(base_feat1, lambd=eta))
                # print(d_pixel)
                if not target:
                    _, feat_pixel = self.netD_pixel(base_feat1.detach())
            else:
                d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
        base_feat = self.RCNN_base2(base_feat1)
        if self.dc == 'vanilla':
            domain = self.netD_dc(grad_reverse(base_feat, lambd=eta))
            if target:
                return None, domain
        elif self.dc == 'swda':
            if self.gc:
                domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
                if target:
                    return d_pixel, domain_p
                _, feat = self.netD(base_feat.detach())
            else:
                domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
                if target:
                    return d_pixel, domain_p
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        #feat_pixel = torch.zeros(feat_pixel.size()).cuda()
        if self.lc:
            feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
        if self.gc:
            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)
            # compute bbox offset

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)
        if self.conf:
            # confidence
            confidence = F.sigmoid(self.netD_confidence(pooled_feat))
            # Make sure we don't have any numerical instability
            eps = 1e-12
            pred_original = torch.clamp(cls_prob, 0. + eps, 1. - eps)
            confidence = torch.clamp(confidence, 0. + eps, 1. - eps)
            confidence_loss = (-torch.log(confidence))

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            if self.conf and hints:
                # Randomly set half of the confidences to 1 (i.e. no hints)
                b = torch.bernoulli(
                    torch.Tensor(confidence.size()).uniform_(0, 1)).cuda()
                conf = confidence * b + (1 - b)
                labels_onehot = encode_onehot(rois_label,
                                              pred_original.size(1))
                pred_new = pred_original * conf.expand_as(pred_original) + \
                    labels_onehot * (1 - conf.expand_as(labels_onehot))
                pred_new = torch.log(pred_new)
                RCNN_loss_cls = F.nll_loss(pred_new, rois_label)

            else:
                RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        if test:
            self.training = True
        if self.dc == 'swda' and self.conf is None:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p, None, None
        elif self.dc == 'vanilla' and self.conf is None:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, domain, None, None
        elif self.conf and self.dc is None:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, None, confidence_loss, confidence
        elif self.conf and self.dc == "swda":
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p, confidence_loss, confidence
        elif self.conf and self.dc == "vanilla":
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, domain, confidence_loss, confidence
        else:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, None, None, None
コード例 #7
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                target=False,
                eta=1.0):

        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data
        lossQ = -1

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        # feed base feature map tp RPN to obtain rois'''
        #print("target is ",target)
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes, target)
        #print("rois.shape:",rois.shape)
        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training and not target:
            #print("source traning---------------------------")
            #print("batch_size:",batch_size)
            #print("gt_boxes.shape:",gt_boxes.shape)
            #print("num_boxes:",num_boxes.data)
            '''
            print(self.training)
            print(~target)
            print("use ground trubut bboxes for refining")'''
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            lossQ = -1

        rois = Variable(rois)
        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        #print("pooled_feat before _head_to_tail:",pooled_feat.shape)
        if self.context:
            d_instance, _ = self.netD_pixel(
                grad_reverse(pooled_feat, lambd=eta))
            #if target:
            #d_instance, _ = self.netD_pixel(grad_reverse(pooled_feat, lambd=eta))
            #return d_pixel#, diff
            d_score_total, feat = self.netD_pixel(pooled_feat.detach())
        else:
            d_score_total = self.netD_pixel(pooled_feat.detach())
            d_instance = self.netD_pixel(grad_reverse(pooled_feat, lambd=eta))
            #if target:
            #return d_pixel#,diff

        #d_score_total, _ = self.netD_pixel(pooled_feat.detach())
        #print("d_score_total.shape",d_score_total.shape)
        #print("pooled_feat.shape:",pooled_feat.shape)
        d_instance_q = d_instance.split(128, 0)

        d_score_total_q = d_score_total.split(128, 0)
        d_score_total_qs = []
        for img in range(batch_size):
            temp = torch.mean(d_score_total_q[img], dim=3)
            d_score_total_qs.append(torch.mean(temp, dim=2))

        #d_score_total = torch.mean(d_score_total,dim=3)
        #d_score_total = torch.mean(d_score_total,dim=2)
        pooled_feat = self._head_to_tail(pooled_feat)

        #print("pooled_feat.shape:",pooled_feat.shape)

        if self.training and self.S_agent:
            pooled_feat_s = pooled_feat.split(128, 0)
            for img in range(batch_size):
                pooled_feat_d = pooled_feat_s[img]
                #print("------------------begain selecting in the source-----------------------")
                select_iter = int(pooled_feat_d.shape[0] / self.candidate_num)
                total_index = list(range(0, pooled_feat_d.shape[0]))
                np.random.shuffle(total_index)
                select_index = []
                for eposide in range(select_iter):
                    #print("#################################begain batch-%d-th the %d-th eposide##################################" % (img,eposide))
                    select_list = list(range(0, self.candidate_num))
                    batch_idx = total_index[eposide *
                                            self.candidate_num:(eposide + 1) *
                                            self.candidate_num]
                    state = pooled_feat_d[batch_idx]
                    #print("state.shape:",state.shape)
                    d_score = d_score_total_qs[img][batch_idx]
                    #print("d_score.shape:",d_score.shape)
                    for it in range(self.select_num):
                        #print("#########begain the %d-th selection################" % (it))
                        epsilon = self.epsilon_by_epoch(self.iter_dqn)
                        action_index = self.current_model.act(
                            state, epsilon, select_list)
                        #print("action_index:",action_index)
                        #action_episode.append(action_index)
                        try:
                            select_list.remove(action_index)
                        except:
                            print("select_list:", select_list)
                            print("action_index:", action_index)
                            print(
                                "error!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                            continue
                        #print("the %d-th select, action_index is %d"%(it,action_index))
                        if d_score[action_index] > self.ts:
                            reward = -1
                        else:
                            reward = 1
                        #print("reward:",reward)
                        next_state = torch.tensor(state)
                        next_state[action_index] = torch.zeros(
                            1, next_state.shape[1])
                        if it == (self.select_num - 1):
                            done = 1
                        else:
                            done = 0
                        self.replay_buffer.push(state, action_index, reward,
                                                next_state, done, select_list)
                        self.iter_dqn = self.iter_dqn + 1
                        state = next_state
                    select_index = select_index + [
                        batch_idx[i] for i in select_list
                    ]
                if len(self.replay_buffer) > cfg.BATCH_SIZE_DQN:
                    lossQ = DQN.compute_td_loss(self.current_model,
                                                self.target_model,
                                                self.replay_buffer,
                                                cfg.BATCH_SIZE_DQN)
                if np.mod(self.iter_dqn, cfg.replace_target_iter) == 0:
                    DQN.update_target(self.current_model, self.target_model)
                if img == 0:
                    d_instance_refine = d_instance_q[img][select_index]
                else:
                    d_instance_refine = torch.cat(
                        (d_instance_refine, d_instance_q[img][select_index]),
                        0)
        pooled_feat_original = torch.tensor(pooled_feat)
        if self.context:
            feat = feat.view(feat.size(0), -1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic and not target:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        #print("pooled_feat.shape in faster_rcnn_global_pixel_instance:",pooled_feat.shape)
        cls_score = self.RCNN_cls_score(pooled_feat)

        cls_prob = F.softmax(cls_score, 1)
        #print("cls_prob is ",cls_prob.shape)

        if self.training and target and self.T_agent:
            pooled_feat_t = pooled_feat_original.split(128, 0)
            for img in range(batch_size):
                pooled_feat_d = pooled_feat_t[img]

                select_iter_T = int(pooled_feat_d.shape[0] /
                                    self.candidate_num)
                #print("select_iter_T:",select_iter_T)
                total_index_T = list(range(0, pooled_feat_d.shape[0]))
                np.random.shuffle(total_index_T)
                #print("gt_label:",gt_label)
                #print("total_index:",len(total_index))
                select_index_T = []
                for eposide_T in range(select_iter_T):
                    select_list_T = list(range(0, self.candidate_num))
                    batch_idx_T = total_index_T[eposide_T *
                                                self.candidate_num:(eposide_T +
                                                                    1) *
                                                self.candidate_num]
                    state_T = pooled_feat_d[batch_idx_T]
                    d_score_T = d_score_total_qs[img][batch_idx_T]
                    #print("label_pre:",label_pre)
                    for it in range(self.select_num):
                        epsilon_T = self.epsilon_by_epoch_T(self.iter_dqn_T)
                        action_index_T = self.current_model_T.act(
                            state_T, epsilon_T, select_list_T)
                        #select_list_T.remove(action_index_T)
                        try:
                            select_list_T.remove(action_index_T)
                        except:
                            print("select_list_T:", select_list_T)
                            print("action_index:", action_index_T)
                            print(
                                "error!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                            continue
                        #print("label_pre[action_index_T]:",label_pre[action_index_T])
                        #print("torch.eq(gt_label,label_pre[action_index_T]):",torch.eq(gt_label,label_pre[action_index_T]))
                        if d_score_T[action_index_T] > self.tt:
                            reward = 1
                        else:
                            reward = -1
                        #print("D_score:",d_score_T[action_index_T][1],"reward:",reward)
                        next_state_T = torch.tensor(state_T)
                        next_state_T[action_index_T] = torch.zeros(
                            1, next_state_T.shape[1])
                        if it == (self.select_num - 1):
                            done = 1
                        else:
                            done = 0
                        self.replay_buffer_T.push(state_T, action_index_T,
                                                  reward, next_state_T, done,
                                                  select_list_T)
                        self.iter_dqn_T = self.iter_dqn_T + 1
                        state_T = next_state_T
                        #print("select_list_T:",select_list_T)
                        #if len(self.replay_buffer_T)>cfg.BATCH_SIZE_DQN:
                        #    lossQ = DQN.compute_td_loss(self.current_model_T,self.target_model_T,self.replay_buffer_T,cfg.BATCH_SIZE_DQN)
                        #if np.mod(self.iter_dqn_T,cfg.replace_target_iter)==0:
                        #    DQN.update_target(self.current_model_T,self.target_model_T)
                    select_index_T = select_index_T + [
                        batch_idx_T[i] for i in select_list_T
                    ]
                if len(self.replay_buffer_T) > cfg.BATCH_SIZE_DQN:
                    lossQ = DQN.compute_td_loss(self.current_model_T,
                                                self.target_model_T,
                                                self.replay_buffer_T,
                                                cfg.BATCH_SIZE_DQN)
                if np.mod(self.iter_dqn_T, cfg.replace_target_iter) == 0:
                    DQN.update_target(self.current_model_T,
                                      self.target_model_T)
                #d_instance = d_instance[select_index_T]
                if img == 0:
                    d_instance_refine = d_instance_q[img][select_index_T]
                else:
                    d_instance_refine = torch.cat(
                        (d_instance_refine, d_instance_q[img][select_index_T]),
                        0)

        if target:
            return d_instance_refine, lossQ

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        if self.S_agent:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_instance_refine, lossQ  #,diff
        else:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_instance, lossQ
コード例 #8
0
    def forward(self,
                im_data,
                im_info,
                im_cls_lb,
                gt_boxes,
                num_boxes,
                target=False,
                eta=1.0):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        if self.lc:
            d_pixel, _ = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
            # print(d_pixel)
            if not target:
                _, feat_pixel = self.netD_pixel(base_feat1.detach())
        else:
            d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
        base_feat = self.RCNN_base2(base_feat1)
        if self.gc:
            domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel, domain_p  # , diff
            _, feat = self.netD(base_feat.detach())
        else:
            domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel, domain_p  # ,diff
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)
        # supervise base feature map with category level label
        cls_feat = self.avg_pool(base_feat)
        cls_feat = self.conv_lst(cls_feat).squeeze(-1).squeeze(-1)
        # cls_feat = self.conv_lst(self.bn1(self.avg_pool(base_feat))).squeeze(-1).squeeze(-1)
        category_loss_cls = nn.BCEWithLogitsLoss()(cls_feat, im_cls_lb)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == "align":
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == "pool":
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        # feat_pixel = torch.zeros(feat_pixel.size()).cuda()
        if self.lc:
            feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
        if self.gc:
            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)
            # compute bbox offset

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view,
                1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4),
            )
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return (
            rois,
            cls_prob,
            bbox_pred,
            category_loss_cls,
            rpn_loss_cls,
            rpn_loss_bbox,
            RCNN_loss_cls,
            RCNN_loss_bbox,
            rois_label,
            d_pixel,
            domain_p,
        )  # ,diff
コード例 #9
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes,target=False,eta=1.0):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        if self.lc:
            d_pixel, _ = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
            #print(d_pixel)
            # if not target:
            _, feat_pixel = self.netD_pixel(base_feat1.detach())
        else:
            d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))

        if self.la_attention:
            base_feat1 = local_attention(base_feat1, d_pixel.detach())

        base_feat2 = self.RCNN_base2(base_feat1)
        if self.gc:
            domain_mid, _ = self.netD_mid(grad_reverse(base_feat2, lambd=eta))
            # if not target:
            _, feat_mid = self.netD_mid(base_feat2.detach())
        else:
            domain_mid = self.netD_mid(grad_reverse(base_feat2, lambd=eta))

        if self.mid_attention:
            base_feat2 = middle_attention(base_feat2, domain_mid.detach())

        base_feat = self.RCNN_base3(base_feat2)
        if self.gc:
            domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
            # if target:
            #     return d_pixel,domain_p,domain_mid#, diff
            _,feat = self.netD(base_feat.detach())
        else:
            domain_p = self.netD(grad_reverse(base_feat, lambd=eta))

            # if target:
            #     return d_pixel,domain_p,domain_mid#,diff
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        #feat_pixel = torch.zeros(feat_pixel.size()).cuda()
        #
        feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
        feat_mid = feat_mid.view(1, -1).repeat(pooled_feat.size(0), 1)
        feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
        # concat
        feat = torch.cat((feat_mid, feat), 1)
        feat = torch.cat((feat_pixel, feat), 1)
        #
        feat_random = self.RandomLayer([pooled_feat, feat])

        d_ins = self.netD_da(grad_reverse(feat_random, lambd=eta))

        if target:
            return d_pixel, domain_p, domain_mid, d_ins

        pooled_feat = torch.cat((feat, pooled_feat), 1)
        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,d_pixel, domain_p,domain_mid, d_ins#,diff
コード例 #10
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                target=False,
                eta=1.0):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # get all vector of class for label
        if self.training and target:
            cls_label_ind = torch.unique(gt_boxes[:, :, 4].cpu())
            cls_label = torch.zeros(self.n_classes)
            cls_label[cls_label_ind.long()] = 1
            # assume always have backgound categories
            cls_label[0] = 1
            cls_label = cls_label.cuda()
            cls_label.requires_grad = False

        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        if self.lc:
            d_pixel, _ = self.netD_pixel_1(grad_reverse(base_feat1, lambd=eta))
            # print(d_pixel)
            if not target:
                _, feat_pixel = self.netD_pixel_1(base_feat1.detach())
        else:
            d_pixel = self.netD_pixel_1(grad_reverse(base_feat1, lambd=eta))

        base_feat2 = self.RCNN_base2(base_feat1)
        if self.lc:
            d_pixel_2, _ = self.netD_pixel_2(
                grad_reverse(base_feat2, lambd=eta))
        else:
            d_pixel_2 = self.netD_pixel_2(grad_reverse(base_feat2, lambd=eta))

        base_feat3 = self.RCNN_base3(base_feat2)
        if self.lc:
            d_pixel_3, _ = self.netD_pixel_3(
                grad_reverse(base_feat3, lambd=eta))
        else:
            d_pixel_3 = self.netD_pixel_3(grad_reverse(base_feat3, lambd=eta))
            # print(d_pixel_3.mean())

        base_feat4 = self.RCNN_base4(base_feat3)
        if self.gc:
            d_pixel_4, _ = self.netD_1(grad_reverse(base_feat4, lambd=eta))
        else:
            d_pixel_4 = self.netD_1(grad_reverse(base_feat4, lambd=eta))

        # something wrong
        base_feat = self.RCNN_base5(base_feat4)
        # for target domain training, we need to return the d_pixel, domain_p
        if self.gc:
            domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p
            _, feat = self.netD(base_feat.detach())
        else:
            domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training and not target:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        #feat_pixel = torch.zeros(feat_pixel.size()).cuda()
        if self.lc:
            feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
        if self.gc:
            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)
            # compute bbox offset

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic and not target:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        # compute the sum of weakly score
        if False:
            #cls_prob_sum = torch.sum(cls_prob, 0)
            # x = max(1, x)
            #cls_prob_sum = cls_prob_sum.repeat(2, 1)
            #cls_prob_sum = torch.min(cls_prob_sum, 0)[0]
            max_roi_cls_prob = torch.max(cls_prob, 0)[0]
            #assert (max_roi_cls_prob.data.cpu().numpy().all() >= 0. and max_roi_cls_prob.data.cpu().numpy().all() <= 1.)
            if not (max_roi_cls_prob.data.cpu().numpy().all() >= 0.
                    and max_roi_cls_prob.data.cpu().numpy().all() <= 1.):
                pdb.set_trace()
            if not (cls_label.data.cpu().numpy().all() >= 0.
                    and cls_label.data.cpu().numpy().all() <= 1.):
                pdb.set_trace()
            BCE_loss = F.binary_cross_entropy(max_roi_cls_prob, cls_label)
            return d_pixel, domain_p, BCE_loss

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        # for weakly detection, concentrate the cls_score and calculate the loss

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p
        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p  # ,diff
コード例 #11
0
ファイル: faster_rcnn.py プロジェクト: zyg11/CR-DA-DET
    def forward(
        self,
        im_data,
        im_info,
        im_cls_lb,
        gt_boxes,
        num_boxes,
        target=False,
        eta=1.0,
        weight_value=1.0,
    ):
        if target:
            need_backprop = torch.Tensor([0]).cuda()
            self.RCNN_rpn.eval()
        else:
            need_backprop = torch.Tensor([1]).cuda()
            self.RCNN_rpn.train()

        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        if self.lc:
            d_pixel, _ = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
            # print(d_pixel)
            # if not target:
            if True:
                _, feat_pixel = self.netD_pixel(base_feat1.detach())
        else:
            d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
        base_feat = self.RCNN_base2(base_feat1)
        if self.gc:
            domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
            # if target:
            #     return d_pixel,domain_p#, diff
            _, feat = self.netD(base_feat.detach())
        else:
            domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
            # if target:
            #     return d_pixel,domain_p#,diff
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)
        # supervise base feature map with category level label
        cls_feat = self.avg_pool(base_feat)
        cls_feat = self.conv_lst(cls_feat).squeeze(-1).squeeze(-1)
        # cls_feat = self.conv_lst(self.bn1(self.avg_pool(base_feat))).squeeze(-1).squeeze(-1)
        category_loss_cls = nn.BCEWithLogitsLoss()(cls_feat, im_cls_lb)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == "align":
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == "pool":
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        instance_pooled_feat = pooled_feat
        # feat_pixel = torch.zeros(feat_pixel.size()).cuda()
        if self.lc:
            feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
            if self.da_use_contex:
                instance_pooled_feat = torch.cat(
                    (feat_pixel.detach(), instance_pooled_feat), 1)
        if self.gc:
            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)
            if self.da_use_contex:
                instance_pooled_feat = torch.cat(
                    (feat.detach(), instance_pooled_feat), 1)
            # compute bbox offset

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        # add instance da
        instance_sigmoid, same_size_label = self.RCNN_instanceDA(
            instance_pooled_feat, need_backprop)

        if target:
            cls_pre_label = cls_prob.argmax(1).detach()
            cls_feat_sig = F.sigmoid(cls_feat[0]).detach()
            target_weight = []
            for i in range(len(cls_pre_label)):
                label_i = cls_pre_label[i].item()
                if label_i > 0:
                    diff_value = torch.exp(
                        weight_value * torch.abs(cls_feat_sig[label_i - 1] -
                                                 cls_prob[i][label_i])).item()
                    target_weight.append(diff_value)
                else:
                    target_weight.append(1.0)

            instance_loss = nn.BCELoss(
                weight=torch.Tensor(target_weight).view(-1, 1).cuda())
        else:
            instance_loss = nn.BCELoss()
        DA_ins_loss_cls = instance_loss(instance_sigmoid, same_size_label)

        if target:
            return d_pixel, domain_p, DA_ins_loss_cls

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view,
                1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4),
            )
            bbox_pred = bbox_pred_select.squeeze(1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return (
            rois,
            cls_prob,
            bbox_pred,
            category_loss_cls,
            rpn_loss_cls,
            rpn_loss_bbox,
            RCNN_loss_cls,
            RCNN_loss_bbox,
            rois_label,
            d_pixel,
            domain_p,
            DA_ins_loss_cls,
        )
コード例 #12
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes,target=False,eta=1.0):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        #-------------------------------------------------------------
        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        # print('\nbase_feat1: ', base_feat1.shape) #torch.Size([1, 256, 150, 184])


        domain_p1 = self.netD1(grad_reverse(base_feat1, lambd=eta)) # get att map * base_feat1, use 1 atten only
        # print('\ncam_logit_p1: ', cam_logit_p1.shape) #torch.Size([1, 2])
        domain_p1_en = prob2entropy2(domain_p1)
        base_feat1 = base_feat1 * domain_p1_en
        # print('\nbase_feat1 af: ', base_feat1.shape) #torch.Size([1, 256, 150, 238])
        # print('\natt_map: ', att_map.shape)
        # print('\ndomain_p1: ', domain_p1.shape) # torch.Size([1, 1, 150, 200]) 
        # base_feat1 = base_feat1 * att_map_256 # atten 1
        # print('\n att base_feat1 map: ', base_feat1.shape)

        feat1 = self.netD_forward1(base_feat1.detach()) # add attention module! # test no .detach()
        # base_feat1.detach(): the gradients of self.netD_forward1() will update parameters of ifself,
        # don't update the previous ones! Example:
        # def forward(self, x):
        #   x = self.net1(x)
        #   return self.net2(x.detach()) # training will only update parameters on net2, not net1

        feat1_p = F.softmax(feat1, 1)
        feat1_en = prob2entropy(feat1_p)
        feat1 = feat1 * feat1_en
        # feat1 = self.netD_forward1(base_feat1) 
        
        # feat1 = feat1 * att_map # atten 2

        # print('\nfeat1: ', feat1.shape) # torch.Size([1, 128, 1, 1])

        # domain_p12, _ = self.netD21(grad_reverse(base_feat1, lambd=eta)) # cuda our of memory
        # base_feat1 = base_feat1 * att_map_256 # atten 1 DON'T WORK!!
        #----------------------------------------------------------------
        base_feat2 = self.RCNN_base2(base_feat1)

        domain_p2 = self.netD2(grad_reverse(base_feat2, lambd=eta))
        # print('\ndomain_p2: ', domain_p2.shape) #torch.Size([1, 2])
        # base_feat2 = base_feat2 * att_map_512
        feat2 = self.netD_forward2(base_feat2.detach())
       

        feat2_p = self.fc2(feat2.view(-1, 128)) # nn.Linear(128,2)
        feat2 = global_attention(feat2, feat2_p)
        # feat2 = self.netD_forward2(base_feat2)
        # feat2 = feat2 * att_map_128

        # print('\nbase_feat2: ', base_feat2.shape) #torch.Size([1, 512, 75, 92]
        # print('\ncam_logit_p2: ', cam_logit_p2.shape) # torch.Size([1, 2])

        # print('\nfeat2: ', feat2.shape)  #torch.Size([1, 128, 1, 1])

        # domain_p2_sig, _ = self.netD12(grad_reverse(base_feat2, lambd=eta))
        # base_feat2 = base_feat2 * att_map_512
        #----------------------------------------------------------------

        base_feat = self.RCNN_base3(base_feat2)

        domain_p3 = self.netD3(grad_reverse(base_feat, lambd=eta))
        # print('\ndomain_p3: ', domain_p3.shape) #torch.Size([1, 2])
        # print('\nbase_feat: ', base_feat.shape) #torch.Size([1, 1024, 38, 46])

        # print('\ncam_logit_p3: ', cam_logit_p3.shape) #torch.Size([1, 2])


        # base_feat = base_feat * att_map_1024
        feat3 = self.netD_forward3(base_feat.detach())
        feat3_p = self.fc3(feat3.view(-1, 128))
        feat3 = global_attention(feat3, feat3_p)

        # feat3_en = prob2entropy(F.sigmoid(feat3))
        # feat3 = feat3 * feat3_en

        # feat3 = self.netD_forward3(base_feat)
        # print('\nfeat3: ', feat3.shape) # torch.Size([1, 128, 1, 1])


        # feat3 = feat3 * att_map_128
        # domain_p3_sig, _ = self.netD13(grad_reverse(base_feat, lambd=eta))
        # base_feat = base_feat * att_map_1024

        #----------------------------------------------------------------

        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground truth bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)

        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))

        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        feat1 = feat1.view(1, -1).repeat(pooled_feat.size(0), 1)
        pooled_feat = torch.cat((feat1, pooled_feat), 1)

        feat2 = feat2.view(1, -1).repeat(pooled_feat.size(0), 1)
        pooled_feat = torch.cat((feat2, pooled_feat), 1)

        
        feat3 = feat3.view(1, -1).repeat(pooled_feat.size(0), 1)
        pooled_feat = torch.cat((feat3, pooled_feat), 1)

        #---------------------------------------------------------------
        d_inst = self.netD_inst(grad_reverse(pooled_feat, lambd=eta)) ## Add entropy!!?
        #---------------------------------------------------------------
        # print('\nd_inst: ', d_inst.shape) #torch.Size([128, 2])
        #---
        # add entropy loss here




        #---
        if target:
            return d_inst, domain_p1, domain_p2, domain_p3, \
                feat1_p, feat2_p, feat3_p
                # cam_logit_p1, cam_logit_p2, cam_logit_p3
                # domain_p12, domain_p2_sig, domain_p3_sig

        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \
                RCNN_loss_cls, RCNN_loss_bbox, rois_label, \
                d_inst, domain_p1, domain_p2, domain_p3, \
                feat1_p, feat2_p, feat3_p