def _PyramidRoI_Feat(self, feat_maps, rois, im_info): ''' roi pool on pyramid feature maps''' # do roi pooling based on predicted rois img_area = im_info[0][0] * im_info[0][1] h = rois.data[:, 4] - rois.data[:, 2] + 1 w = rois.data[:, 3] - rois.data[:, 1] + 1 roi_level = torch.log(torch.sqrt(h * w) / 224.0) / np.log(2) roi_level = torch.floor(roi_level + 4) # -------- # roi_level = torch.log(torch.sqrt(h * w) / 224.0) # roi_level = torch.round(roi_level + 4) # ------ roi_level[roi_level < 2] = 2 roi_level[roi_level > 5] = 5 # roi_level.fill_(5) if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) # NOTE: need to add pyrmaid grid_xy = _affine_grid_gen(rois, feat_maps.size()[2:], self.grid_size) ## grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() roi_pool_feat = self.RCNN_roi_crop(feat_maps, Variable(grid_yx).detach()) ## if cfg.CROP_RESIZE_WITH_MAX_POOL: roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2) elif cfg.POOLING_MODE == 'align': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero().squeeze() box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] elif cfg.POOLING_MODE == 'pool': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero().squeeze() box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] return roi_pool_feat
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) # TODO rois_main_label = Variable(rois_label.view(-1).long()) rois_sub_class = list( map(lambda x: self.sub_classes[x], rois_main_label)) rois_main_class = list( map(lambda x: sub2main_dict[x], rois_sub_class)) rois_main_label = list( map(lambda x: self.main_classes.index(x), rois_main_class)) rois_main_label = torch.cuda.LongTensor(rois_main_label) rois_main_label = Variable(rois_main_label) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_main_label = None rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 # return roi_data rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pspool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat_main = self._head_to_tail_main(pooled_feat) pooled_feat_sub = self._head_to_tail_sub(pooled_feat) # compute bbox offset bbox_pred_main = self.RCNN_bbox_pred_main(pooled_feat_main) bbox_pred_sub = self.RCNN_bbox_pred_sub(pooled_feat_sub) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view_main = bbox_pred_main.view( bbox_pred_main.size(0), int(bbox_pred_main.size(1) / 4), 4) bbox_pred_select_main = torch.gather( bbox_pred_view_main, 1, rois_main_label.view(rois_main_label.size(0), 1, 1).expand(rois_main_label.size(0), 1, 4)) bbox_pred_main = bbox_pred_select_main.squeeze(1) bbox_pred_view_sub = bbox_pred_sub.view( bbox_pred_sub.size(0), int(bbox_pred_sub.size(1) / 4), 4) bbox_pred_select_sub = torch.gather( bbox_pred_view_sub, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred_sub = bbox_pred_select_sub.squeeze(1) # compute object classification probability cls_score_main = self.RCNN_cls_score_main(pooled_feat_main) cls_prob_main = F.softmax(cls_score_main, 1) cls_score_sub = self.RCNN_cls_score_sub(pooled_feat_sub) cls_prob_sub = F.softmax(cls_score_sub, 1) RCNN_loss_cls_main = 0 RCNN_loss_bbox_main = 0 RCNN_loss_cls_sub = 0 RCNN_loss_bbox_sub = 0 if self.training: # classification loss RCNN_loss_cls_main = F.cross_entropy(cls_score_main, rois_main_label) # TODO roi_lable should RCNN_loss_cls_sub = F.cross_entropy(cls_score_sub, rois_label) # bounding box regression L1 loss RCNN_loss_bbox_main = _smooth_l1_loss(bbox_pred_main, rois_target, rois_inside_ws, rois_outside_ws) RCNN_loss_bbox_sub = _smooth_l1_loss(bbox_pred_main, rois_target, rois_inside_ws, rois_outside_ws) cls_prob_main = cls_prob_main.view(batch_size, rois.size(1), -1) bbox_pred_main = bbox_pred_main.view(batch_size, rois.size(1), -1) cls_prob_sub = cls_prob_sub.view(batch_size, rois.size(1), -1) bbox_pred_sub = bbox_pred_sub.view(batch_size, rois.size(1), -1) if self.training: rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls_main = torch.unsqueeze(RCNN_loss_cls_main, 0) RCNN_loss_bbox_main = torch.unsqueeze(RCNN_loss_bbox_main, 0) RCNN_loss_cls_sub = torch.unsqueeze(RCNN_loss_cls_sub, 0) RCNN_loss_bbox_sub = torch.unsqueeze(RCNN_loss_bbox_sub, 0) return rois, cls_prob_main, bbox_pred_main, cls_prob_sub, bbox_pred_sub, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls_sub, RCNN_loss_bbox_sub, RCNN_loss_cls_main, RCNN_loss_bbox_main, rois_label
def _PyramidRoI_Feat(self, feat_maps, rois, im_info): ''' roi pool on pyramid feature maps''' # do roi pooling based on predicted rois img_area = im_info[0][0] * im_info[0][1] h = rois.data[:, 4] - rois.data[:, 2] + 1 w = rois.data[:, 3] - rois.data[:, 1] + 1 roi_level = torch.log(torch.sqrt(h * w) / 224.0) / np.log(2) roi_level = torch.floor(roi_level + 4) # -------- # roi_level = torch.log(torch.sqrt(h * w) / 224.0) # roi_level = torch.round(roi_level + 4) # ------ roi_level[roi_level < 2] = 2 roi_level[roi_level > 5] = 5 # roi_level.fill_(5) if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) # NOTE: need to add pyrmaid grid_xy = _affine_grid_gen(rois, feat_maps.size()[2:], self.grid_size) ## grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() roi_pool_feat = self.RCNN_roi_crop(feat_maps, Variable(grid_yx).detach()) ## if cfg.CROP_RESIZE_WITH_MAX_POOL: roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2) elif cfg.POOLING_MODE == 'align': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero().squeeze() box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] elif cfg.POOLING_MODE == 'pool': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero().squeeze() box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] return roi_pool_feat
def forward(self, im_data, im_info, gt_boxes, num_boxes, use_gt_boxes=False): batch_size = im_data.size(0) # feed image data to base model to obtain base feature map base_feat = self.RCNN_base_model(im_data) if not use_gt_boxes: # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info.data, gt_boxes.data, num_boxes.data) else: # otherwise use groundtruth box as the outputs of RCNN_rpn rois = gt_boxes.data.clone() rois[0, :, 0] = 0 rois[0, :, 1:] = gt_boxes.data[0, :, :4] rpn_loss_cls = 0 rpn_loss_bbox = 0 if not self.training: if batch_size == 1: valid = rois.sum(2).view(-1).nonzero().view(-1) rois = rois[:, valid, :] rpn_loss = rpn_loss_cls + rpn_loss_bbox # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes.data) # rois, rois_obj_label, rois_att_label, \ # rois_target, rois_inside_ws, rois_outside_ws = roi_data # rois_obj_label = Variable(rois_obj_label.view(-1)) # rois_att_label = Variable(rois_att_label.view(-1, self.n_att_classes)) # rois_target = Variable(rois_target.view(-1, rois_target.size(2))) # rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) # rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) roi_data_msdn = self.RCNN_proposal_target_msdn(rois, gt_boxes.data) rois, roi_rel_pairs, roi_pair_proposals, rois_obj_label, rois_att_label, rois_rel_label, \ rois_target, rois_inside_ws, rois_outside_ws = roi_data_msdn rois_obj_label = Variable(rois_obj_label.view(-1)) rois_att_label = Variable( rois_att_label.view(-1, self.n_att_classes)) rois_rel_label = Variable(rois_rel_label.view(-1)) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) roi_pair_proposals = roi_pair_proposals.long() roi_pair_proposals_v = roi_pair_proposals.view(-1, 2) ind_subject = roi_pair_proposals_v[:, 0] ind_object = roi_pair_proposals_v[:, 1] else: rois_obj_label = None rois_att_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois_out = [] roi_rel_pairs_out = [] roi_pair_proposals_out = [] for i in range(rois.size(0)): rois, roi_rel_pairs, roi_pair_proposals = self._setup_connection( rois[i]) rois_out.append(rois) roi_rel_pairs_out.append(roi_rel_pairs) roi_pair_proposals_out.append(roi_pair_proposals) rois = torch.stack(rois_out, 0) roi_rel_pairs = torch.stack(roi_rel_pairs_out, 0) roi_pair_proposals = torch.stack(roi_pair_proposals_out, 0) roi_pair_proposals = roi_pair_proposals.long() roi_pair_proposals_v = roi_pair_proposals.view(-1, 2) ind_subject = roi_pair_proposals_v[:, 0] ind_object = roi_pair_proposals_v[:, 1] rois = Variable(rois) if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model x_obj = self._head_to_tail(pooled_feat) # (B x N) x D # compute object classification probability obj_cls_score = self.RCNN_obj_cls_score(x_obj) obj_cls_prob = F.softmax(obj_cls_score) bbox_pred = self.RCNN_bbox_pred(x_obj) if cfg.HAS_ATTRIBUTES: x_att = self._head_to_tail_att(pooled_feat) # (B x N) x D att_cls_score = self.RCNN_att_cls_score(x_att) att_cls_prob = F.softmax(att_cls_score) att_cls_log_prob = F.log_softmax(att_cls_score) if cfg.HAS_RELATIONS: # feed base feature map tp RPN to obtain rois # x_view = x.view(rois.size(0), rois.size(1), x.size(1)) # rel_feats = obj_cls_score.view(rois.size(0), rois.size(1), obj_cls_score.size(1)) # roi_rel_pairs, roi_pair_proposals, roi_rel_pairs_score, relpn_loss_cls = \ # self.RELPN_rpn(rois.data, rel_feats, im_info.data, gt_boxes.data, num_boxes.data, use_gt_boxes) # relpn_loss = relpn_loss_cls # size_per_batch = x_obj.size(0) / batch_size # roi_pair_proposals = roi_pair_proposals + torch.arange(0, batch_size).view(batch_size, 1, 1).type_as(roi_pair_proposals)\ # * size_per_batch # roi_pair_proposals_v = roi_pair_proposals.view(-1, 2) # ind_subject = roi_pair_proposals_v[:, 0] # ind_object = roi_pair_proposals_v[:, 1] # if self.training: # roi_pair_data = self.RELPN_proposal_target(roi_rel_pairs, gt_boxes.data, num_boxes.data) # # pdb.set_trace() # roi_rel_pairs, rois_rel_label, roi_pair_keep = roi_pair_data # rois_rel_label = Variable(rois_rel_label.view(-1)) # roi_pair_keep = roi_pair_keep + torch.arange(0, roi_pair_keep.size(0)).view(roi_pair_keep.size(0), 1).cuda() \ # * roi_pair_proposals_v.size(0) / batch_size # roi_pair_keep = roi_pair_keep.view(-1).long() # ind_subject = roi_pair_proposals_v[roi_pair_keep][:, 0] # ind_object = roi_pair_proposals_v[roi_pair_keep][:, 1] rois_pred = combine_box_pairs(roi_rel_pairs.view(-1, 9)) rois_pred = Variable(rois_pred) # # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(rois_pred.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_pred_feat = self.RELPN_roi_crop( base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_pred_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_pred_feat = self.RELPN_roi_align( base_feat, rois_pred.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_pred_feat = self.RELPN_roi_pool(base_feat, rois_pred.view(-1, 5)) # # combine subject, object and relation feature tohether x_pred = self._head_to_tail_rel(pooled_pred_feat) x_rel = x_pred #torch.cat((x_sobj, x_pred, x_oobj), 1) # compute object classification probability rel_cls_score = self.RCNN_rel_cls_score(x_rel) rel_cls_prob = F.softmax(rel_cls_score) if cfg.GCN_ON_FEATS and cfg.GCN_LAYERS > 0: if cfg.GCN_HAS_ATTENTION: x_sobj = obj_cls_score[ind_subject] x_oobj = obj_cls_score[ind_object] attend_score = self.GRCNN_gcn_att1(x_sobj, x_oobj) # N_rel x 1 attend_score = attend_score.view(1, x_pred.size(0)) else: attend_score = Variable( x_rel.data.new(1, x_pred.size(0)).fill_(1)) # compute the intiial maps, including map_obj_att, map_obj_obj and map_obj_rel # NOTE we have two ways to compute map among objects, one way is based on the overlaps among object rois. # NOTE the intution behind this is that rois with overlaps should share some common features, we need to # NOTE exclude one roi feature from another. # NOTE another way is based on the classfication scores. The intuition is that, objects have some common # cooccurence, such as bus are more frequently appear on the road. assert x_obj.size() == x_att.size( ), "the numbers of object features and attribute features should be the same" size_per_batch = x_obj.size(0) / batch_size assert x_obj.size() == x_att.size( ), "the numbers of object features and attribute features should be the same" map_obj_att = torch.eye(x_obj.size(0)).type_as(x_obj.data) if cfg.MUTE_ATTRIBUTES: map_obj_att.zero_() x_att = x_att.detach() map_obj_att = Variable(map_obj_att) map_obj_obj = x_obj.data.new(x_obj.size(0), x_obj.size(0)).fill_(0.0) eye_mat = torch.eye(size_per_batch).type_as(x_obj.data) for i in range(batch_size): map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch].fill_(1.0) map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch] =\ map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch]\ - eye_mat map_obj_obj = Variable(map_obj_obj) map_sobj_rel = Variable( x_obj.data.new(x_obj.size(0), x_rel.size(0)).zero_()) map_sobj_rel.scatter_( 0, Variable(ind_subject.contiguous().view(1, x_rel.size(0))), attend_score) map_oobj_rel = Variable( x_obj.data.new(x_obj.size(0), x_rel.size(0)).zero_()) map_oobj_rel.scatter_( 0, Variable(ind_object.contiguous().view(1, x_rel.size(0))), attend_score) map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 1) if cfg.MUTE_RELATIONS: map_obj_rel.data.zero_() x_rel = x_rel.detach() mat_phrase = Variable(torch.stack((ind_subject, ind_object), 1)) # map_obj_rel = Variable(map_obj_rel) # x_obj = F.relu(self.fc4obj(x_obj)) # x_att = F.relu(self.fc4att(x_att)) # x_pred = F.relu(self.fc4rel(x_pred)) for i in range(cfg.GCN_LAYERS): # pass graph representation to gcn x_obj, x_rel = self.imp(x_obj, x_rel, map_obj_rel, mat_phrase) # pdb.set_trace() # compute object classification loss obj_cls_score = self.RCNN_obj_cls_score(x_obj) obj_cls_prob = F.softmax(obj_cls_score) # compute attribute classification loss att_cls_score = self.RCNN_att_cls_score(x_att) att_cls_prob = F.softmax(att_cls_score) att_cls_log_prob = F.log_softmax(att_cls_score) # compute relation classifcation loss # x_sobj = x_obj[ind_subject] # x_oobj = x_obj[ind_object] x_rel = x_pred # torch.cat((x_sobj, x_pred, x_oobj), 1) rel_cls_score = self.RCNN_rel_cls_score(x_rel) rel_cls_prob = F.softmax(rel_cls_score) self.RCNN_loss_bbox = 0 self.RCNN_loss_obj_cls = 0 self.RCNN_loss_att_cls = 0 self.RCNN_loss_rel_cls = 0 if self.training: self.fg_cnt = torch.sum(rois_obj_label.data.ne(0)) self.bg_cnt = rois_obj_label.data.numel() - self.fg_cnt self.RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) # object classification loss obj_label = rois_obj_label.long() self.RCNN_loss_obj_cls = F.cross_entropy(obj_cls_score, obj_label) # attribute classification loss if cfg.HAS_ATTRIBUTES: att_label = rois_att_label att_label = att_label[rois_obj_label.data.nonzero().squeeze()] # att_cls_score = att_cls_score[rois_obj_label.data.nonzero().squeeze()] # self.RCNN_loss_att_cls = F.multilabel_soft_margin_loss(att_cls_score, att_label) att_cls_log_prob = att_cls_log_prob[ rois_obj_label.data.nonzero().squeeze()] self.RCNN_loss_att_cls = _softmax_with_loss( att_cls_log_prob, att_label) if cfg.HAS_RELATIONS: self.rel_fg_cnt = torch.sum(rois_rel_label.data.ne(0)) self.rel_bg_cnt = rois_rel_label.data.numel() - self.rel_fg_cnt # ce_weights = rel_cls_score.data.new(rel_cls_score.size(1)).fill_(1) # ce_weights[0] = float(self.rel_bg_cnt) / (rois_rel_label.data.numel() + 1e-5) # ce_weights = ce_weights rel_label = rois_rel_label.long() self.RCNN_loss_rel_cls = F.cross_entropy( rel_cls_score, rel_label) rcnn_loss = self.RCNN_loss_bbox + self.RCNN_loss_obj_cls if cfg.HAS_ATTRIBUTES and not cfg.MUTE_ATTRIBUTES: rcnn_loss += cfg.WEIGHT_ATTRIBUTES * self.RCNN_loss_att_cls if cfg.HAS_RELATIONS and not cfg.MUTE_RELATIONS: rcnn_loss += cfg.WEIGHT_RELATIONS * self.RCNN_loss_rel_cls bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) obj_cls_prob = obj_cls_prob.view(batch_size, rois.size(1), -1) att_cls_prob = None if not cfg.HAS_ATTRIBUTES else att_cls_prob.view( batch_size, rois.size(1), -1) rel_cls_prob = None if not cfg.HAS_RELATIONS else rel_cls_prob.view( batch_size, rel_cls_prob.size(0) / batch_size, -1) if self.ext_feat: rel_pairs = roi_pair_proposals return base_feat, rois.data, rel_pairs, bbox_pred.data, x_obj.data, x_att.data, x_rel.data, \ obj_cls_prob.data, att_cls_prob.data, rel_cls_prob.data, \ obj_cls_score.data, att_cls_score.data, rel_cls_score.data if cfg.HAS_ATTRIBUTES and cfg.HAS_RELATIONS: if self.training: return rois, bbox_pred, obj_cls_prob, att_cls_prob, rel_cls_prob, rpn_loss, rcnn_loss else: rel_pairs = roi_pair_proposals return rois, rel_pairs, bbox_pred, obj_cls_prob, att_cls_prob, rel_cls_prob, rpn_loss, rcnn_loss elif cfg.HAS_ATTRIBUTES: return rois, bbox_pred, obj_cls_prob, att_cls_prob, rpn_loss, rcnn_loss else: return rois, bbox_pred, obj_cls_prob, rpn_loss, rcnn_loss
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1) # if we want to change batch_size, we should consider to change roi2gt_assignment[0] # roi_part_match[0] and roi_part_match_overlap[0] and so on if True: iou_threshold = 0.8 dis_threshold = 0.2 # first, calculate the overlaps among rois, set weights in edges between nodes iou>0.7 to 1 overlaps = bbox_overlaps_batch(rois, rois) overlaps_bin = overlaps.cpu().data.numpy().copy() _, N_node, _ = overlaps.shape overlaps_bin1 = torch.unsqueeze(torch.eye(N_node, N_node).cuda(), dim=0) overlaps_bin1[overlaps >= iou_threshold] = 1 overlaps_bin1[overlaps < iou_threshold] = 0 for j in range(N_node): for k in range(N_node): if overlaps_bin[0][j][k] >= iou_threshold: overlaps_bin[0][j][k] = 1 else: overlaps_bin[0][j][k] = 0 if k == j: overlaps_bin[0][j][k] = 0 # second, calculate the distance among rois, set weights in edges between nodes iou=0 and dis<threshold to 1 distances = bbox_distances_batch(rois, rois) distances_bin = distances.cpu().data.numpy().copy() for j in range(N_node): for k in range(N_node): if distances_bin[0][j][k] <= dis_threshold: distances_bin[0][j][k] = 1 else: distances_bin[0][j][k] = 0 if k == j: distances_bin[0][j][k] = 0 #adj_matrix_bin = overlaps_bin + distances_bin # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) dot_product_mat = torch.mm(pooled_feat, torch.transpose(pooled_feat, 0, 1)) len_vec = torch.unsqueeze(torch.sqrt( torch.sum(pooled_feat * pooled_feat, dim=1)), dim=0) len_mat = torch.mm(torch.transpose(len_vec, 0, 1), len_vec) pooled_feat_sim_mat = dot_product_mat / len_mat cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6) # update 20191027: build graph for rois based on index (default: batch_size = 1) part_size = 10 relation_size = 5 if True: cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6) # calculate the adj_mat based on adj_matrix_bin, the weights on edges are the cosine distance between nodes adj_matrix = np.zeros((N_node, N_node)) for s in range(N_node): row_idx = [t for t in range(N_node)] random.shuffle(row_idx) part_cnt = 0 relation_cnt = 0 for t in row_idx: if part_cnt <= part_size: if overlaps_bin[0, s, t] == 1: node_feat_s = pooled_feat[s, :] node_feat_t = pooled_feat[t, :] adj_matrix[s, t] = cos(node_feat_s, node_feat_t) part_cnt = part_cnt + 1 continue for t in row_idx: if part_cnt <= part_size: if overlaps_bin[0, s, t] == 1: node_feat_s = pooled_feat[s, :] node_feat_t = pooled_feat[t, :] adj_matrix[s, t] = cos(node_feat_s, node_feat_t) part_cnt = part_cnt + 1 continue # if relation_cnt <= relation_size: # if distances_bin[0, s, t] == 1: # node_feat_s = pooled_feat[s, :] # node_feat_t = pooled_feat[t, :] # adj_matrix[s, t] = cos(node_feat_s, node_feat_t) # relation_cnt = relation_cnt + 1 # continue # if part_cnt > part_size and relation_cnt > relation_size: # break if part_cnt > part_size: break adj_matrix = torch.from_numpy(adj_matrix).float().cuda() pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix)) pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix)) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # update 2019-6-17:fix the bug for dimension specified as 0... if self.training: rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data if self.K > 1 and self.training: # check we have the same GT num for all images of the stack _cmp = torch.nonzero( (gt_boxes[0] !=0).sum(1) ).numel() for k in range(self.K-1): assert torch.nonzero( (gt_boxes[k] !=0).sum(1) ).numel() == _cmp assert num_boxes[0] == num_boxes[k] # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # size [nBacth, nChan, H, W], usually nChan = 1024 # feed base feature map tp RPN to obtain rois, rois size: [nBacth, numTopProps, 1+4] rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) # rois size: [nBacth, numTopTrain], rois_target and weights sizes: [nBacth, numTopTrain, 4] # usually numTopTrain = 200 rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5)) # feed pooled features to top model, e.g. in resnet, this is the 4th block layer followed by average pooling # on spatial dimension # sizes: [nBacth x nRoi, nChan, poolSize, poolSize] --> [nBacth x nRoi, nOutChan] # usually poolSize = 7 and nOutChan = 2048 pooled_feat = self._head_to_tail(pooled_feat) if self.K > 1: # stack channels from all images making nBatch = 1 nrois = rois.size(1) _, nc = pooled_feat.shape pooled_feat = pooled_feat.view(self.K, nrois, nc) # [nBacth, nRoi, nOutChan] pooled_feat.transpose_(0,1) pooled_feat = pooled_feat.contiguous().view(nrois, -1) # [nRoi, nBacth x nOutChan] # compute bbox offset # if not class agnostic: bbox_pred size [nBacth x nRoi, 4 x nClasses], nClasses includes bkg bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.K > 1: # reshape in the expected order bbox_pred = bbox_pred.view(nrois, self.K, self.n_classes * 4) bbox_pred = bbox_pred.transpose(0,1) # not inplace prevent autograd error bbox_pred = bbox_pred.contiguous().view(nrois * self.K, -1) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability of size: [nBacth x nRoi, nClasses] cls_score = self.RCNN_cls_score(pooled_feat) if self.K > 1: # we predict only one score for the whole stack, replicate it cls_score.unsqueeze_(0) cls_score = cls_score.repeat(self.K, 1, 1) cls_score = cls_score.view(-1, self.n_classes) cls_prob = F.softmax(cls_score, dim=1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) # [nBacth, nRoi, nClasses] bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # [nBacth, nRoi, 4 or 4 x nClasses] return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) #print(im_data.shape) # feed base feature map tp RPN to obtain rois #todo modificato, adesso restituisce anche Ps e Rs (rpn_cls_score e rpn_bbox_pred) rois, rpn_loss_cls, rpn_loss_bbox, rpn_cls_score, rpn_bbox_pred, fg_bg_label, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training or self.teaching: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 # rpn_loss_bbox = 0 ''' roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) ''' rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic or self.teaching: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 #todo anche qui le loss L_hard per la cls e L_s per la reg sono già calcolate if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) if not (self.training or self.teaching): bbox_pred = bbox_pred.view( batch_size, rois.size(1), -1) #reshape commentato per il calcolo della loss esterno # rpn_bbox_inside_weights(1,36,37,56)=outside RPN_mask = rpn_bbox_inside_weights, rpn_bbox_outside_weights # rpn_bbox_targets (1,36,37,56): 4 coordinate * 9 anchor per ciascun elemento della feature map # rpn_bbox_pred (1,36,37,56) # rpn_loss_box (int): RPN_reg = rpn_bbox_targets, rpn_bbox_pred, rpn_loss_bbox # rpn_cls_score (256,2): logits in uscita dalla strato convoluzionale senza calcolare softmax in RPN. Le probabilità le calcoliamo con softmax in loss.py # fg_back_ground_label (256 di 0,1): ground thruth-> back ground foreground # rpn_loss_cls (int) RPN_cls = rpn_cls_score, fg_bg_label, rpn_loss_cls # rois_inside_weights(256,4)=outside RCN_mask = rois_inside_ws, rois_outside_ws # roi (1,256,5): region of interest generate dal proposal layer (256) # rois_label (256): # bbox_pred (256,4) # rois_target (256,4) # RCNN_loss_bbox (int) RCN_reg = rois, rois_label, rois_target, bbox_pred, RCNN_loss_bbox # cls_score (256,21) # cls_prob (1,256,21) # RCNN_loss_cls(int) RCN_cls = cls_score, cls_prob, RCNN_loss_cls ###Losses: # Loss classification RPN: rpn_loss_cls # Loss regression_RCN : rpn_loss_bbox # Loss classification RCN: RCNN_loss_cls # Loss_regression RCN: RCNN_loss_bbox return RPN_mask, RPN_reg, RPN_cls, RCN_mask, RCN_reg, RCN_cls
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) # TODO rois_main_label = Variable(rois_label.view(-1).long()) rois_sub_class = list(map( lambda x: self.sub_classes[x], rois_main_label)) rois_main_class = list( map(lambda x: sub2main_dict[x], rois_sub_class)) rois_main_label = list(map( lambda x: self.main_classes.index(x), rois_main_class)) rois_main_label = torch.cuda.LongTensor(rois_main_label) rois_main_label = Variable(rois_main_label) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_main_label = None rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 # return roi_data rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen( rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop( base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pspool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # main Rcnn branch # feed pooled features to top model pooled_feat_main = self._head_to_tail_main(pooled_feat) nongt_dim = 300 if self.training else cfg.TEST.RPN_POST_NMS_TOP_N position_matrix = self.extract_position_matrix( rois.view(-1, 5)[:, :4].clone(), nongt_dim=nongt_dim) position_embedding = self.extract_position_embedding( position_matrix, feat_dim=64) pooled_feat_main = self.fc1(pooled_feat_main) attention_feat_1 = self.attention_1( pooled_feat_main, position_embedding) pooled_feat_main = pooled_feat_main + attention_feat_1 pooled_feat_main = self.fc2(pooled_feat_main) attention_feat_2 = self.attention_2(pooled_feat, position_embedding) pooled_feat_main = pooled_feat_main + attention_feat_2 # compute bbox offset bbox_pred_main = self.RCNN_bbox_pred_main(pooled_feat_main) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view_main = bbox_pred_main.view( bbox_pred_main.size(0), int(bbox_pred_main.size(1) / 4), 4) bbox_pred_select_main = torch.gather(bbox_pred_view_main, 1, rois_main_label.view( rois_main_label.size(0), 1, 1).expand(rois_main_label.size(0), 1, 4)) bbox_pred_main = bbox_pred_select_main.squeeze(1) # compute object classification probability cls_score_main = self.RCNN_cls_score_main(pooled_feat_main) cls_prob_main = F.softmax(cls_score_main, 1) # sub Rcnn branch pooled_feat_sub = self._head_to_tail_sub(pooled_feat) #nongt_dim = 300 if self.training else cfg.TEST.RPN_POST_NMS_TOP_N # position_matrix = self.extract_position_matrix( # rois.view(-1, 5)[:, :4].clone(), nongt_dim=nongt_dim) # position_embedding = self.extract_position_embedding( # position_matrix, feat_dim=64) pooled_feat_sub = self.fc1(pooled_feat_sub) attention_feat_1_sub = self.attention_1( pooled_feat_sub, position_embedding) pooled_feat_sub = pooled_feat_sub + attention_feat_1_sub pooled_feat_sub = self.fc2(pooled_feat_sub) attention_feat_2_sub = self.attention_2( pooled_feat_sub, position_embedding) pooled_feat_sub = pooled_feat_sub + attention_feat_2_sub bbox_pred_sub = self.RCNN_bbox_pred_sub(pooled_feat_sub) if self.training and not self.class_agnostic: bbox_pred_view_sub = bbox_pred_sub.view( bbox_pred_sub.size(0), int(bbox_pred_sub.size(1) / 4), 4) bbox_pred_select_sub = torch.gather(bbox_pred_view_sub, 1, rois_label.view( rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred_sub = bbox_pred_select_sub.squeeze(1) cls_score_sub = self.RCNN_cls_score_sub(pooled_feat_sub) # pdb.set_trace() # process weight of main classes to sub score if 'score' in self.casecade_type: main_cls_weight = torch.cuda.FloatTensor( cls_score_main.size()[0], len(self.sub_classes)) for key, val in self.main2sub_idx_dict.items(): for column_idx in val: main_cls_weight[:, column_idx] = cls_score_main[:, key] if self.casecade_type == 'add_score': cls_score_sub += main_cls_weight elif self.casecade_type == 'mul_score': cls_score_sub *= main_cls_weight cls_prob_sub = F.softmax(cls_score_sub, 1) # process weight of main classes to sub prob if 'prob' in self.casecade_type: main_cls_weight = torch.cuda.FloatTensor( cls_prob_main.size()[0], len(self.sub_classes)) for key, val in self.main2sub_idx_dict.items(): for column_idx in val: main_cls_weight[:, column_idx] = cls_prob_main[:, key] if self.casecade_type == 'add_prob': # TODO normalized cls_prob_sub = cls_prob_sub * self.alpha + \ (1-self.alpha) * main_cls_weight RCNN_loss_cls_main = 0 RCNN_loss_bbox_main = 0 RCNN_loss_cls_sub = 0 RCNN_loss_bbox_sub = 0 if self.training: # classification loss RCNN_loss_cls_main = F.cross_entropy( cls_score_main, rois_main_label) # TODO roi_lable should RCNN_loss_cls_sub = F.cross_entropy(cls_score_sub, rois_label) # bounding box regression L1 loss RCNN_loss_bbox_main = _smooth_l1_loss( bbox_pred_main, rois_target, rois_inside_ws, rois_outside_ws) RCNN_loss_bbox_sub = _smooth_l1_loss( bbox_pred_main, rois_target, rois_inside_ws, rois_outside_ws) cls_prob_main = cls_prob_main.view(batch_size, rois.size(1), -1) bbox_pred_main = bbox_pred_main.view(batch_size, rois.size(1), -1) cls_prob_sub = cls_prob_sub.view(batch_size, rois.size(1), -1) bbox_pred_sub = bbox_pred_sub.view(batch_size, rois.size(1), -1) if self.training: rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls_main = torch.unsqueeze(RCNN_loss_cls_main, 0) RCNN_loss_bbox_main = torch.unsqueeze(RCNN_loss_bbox_main, 0) RCNN_loss_cls_sub = torch.unsqueeze(RCNN_loss_cls_sub, 0) RCNN_loss_bbox_sub = torch.unsqueeze(RCNN_loss_bbox_sub, 0) return rois, cls_prob_main, bbox_pred_main, cls_prob_sub, bbox_pred_sub, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls_sub, RCNN_loss_bbox_sub, RCNN_loss_cls_main, RCNN_loss_bbox_main, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes, pooling_size): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # pdb.set_trace() # print("shitrcnn2") # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) if not self.training: #pdb.set_trace() #rois = tweak_rois(rois) rois = select_rois(rois, base_feat) #pdb.set_trace() features = [] prev_feat = im_data for i, module in enumerate(self.RCNN_base._modules.values()): # print(i) # import pdb; pdb.set_trace() # print("shitrcnn") next_feat = module(prev_feat) features.append(next_feat) prev_feat = next_feat features = [features[i] for i in self.interested_modules] popout_rois = np.ndarray((1, 4), dtype="float32") for iF in features: base_feat = iF # import pdb; pdb.set_trace() feature_width = base_feat.size()[2] self.RCNN_roi_pool = _RoIPooling( pooling_size, pooling_size, 1.0 / (im_info[0][0] / feature_width)) self.RCNN_roi_align = RoIAlignAvg( pooling_size, pooling_size, 1.0 / (im_info[0][0] / feature_width)) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop( base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # import pdb; pdb.set_trace() # print("shitrcnn1") #pdb.set_trace() # print("shitrcnn2") pooled_feat = pooled_feat.view(pooled_feat.shape[0], -1) #pdb.set_trace() popout_index = find_the_popout(pooled_feat) # import pdb; pdb.set_trace() # print("shitrcnn2") popout_rois = np.vstack((popout_rois, rois[0, popout_index.item(), 1:5])) # import pdb; pdb.set_trace() # print("shitrcnn") popout_rois = popout_rois[1:, :] rois = rois[0, :, 1:].cpu().numpy() return rois, popout_rois else: # if it is training phrase, then use ground trubut bboxes for refining roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) # added by Cindy based on https://github.com/jwyang/faster-rcnn.pytorch/issues/226 rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, rois, im_info, labels=None, num_boxes=None): batch_size = im_data.size(0) num_rois = rois.size(0) # init_rois = rois.data if self.training: # for multi-GPU try: nb = int(num_boxes[:, 0].item()) except: nb = int(num_boxes.item()) num_boxes = num_boxes.data # ret_prob = rois.new().new_zeros(1,rois.size(1),21) rois = rois[:, :nb] axis1 = int(num_boxes[:, 1].item()) axis2 = int(num_boxes[:, 2].item()) im_data = im_data[:, :, :axis1, :axis2] # im_data_for_aug = im_data.clone() num_boxes = nb # feed image data to base model to obtain base feature map else: num_boxes = num_rois base_feat = self.OICR_base(im_data) rois = Variable(rois) # do roi pooling based on predicted rois #cfg.POOLING_MODE = 'pool' cfg.POOLING_MODE = 'pool' if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.OICR_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.OICR_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.OICR_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model fc7 = self._head_to_tail(pooled_feat) # fc7 ic_score = self.ic_score(fc7).view(batch_size, num_boxes, self.n_classes + 1) ic_score1 = self.ic_score1(fc7).view(batch_size, num_boxes, self.n_classes + 1) ic_score2 = self.ic_score2(fc7).view(batch_size, num_boxes, self.n_classes + 1) self.ic_prob = F.softmax(ic_score, dim=2) self.ic_prob1 = F.softmax(ic_score1, dim=2) self.ic_prob2 = F.softmax(ic_score2, dim=2) # loss_midn=loss_oicr=loss_oicr1=loss_oicr2=0 self.midn_prob0 = self.midn_score0(fc7).view(batch_size, num_boxes, self.n_classes) self.midn_prob1 = self.midn_score1(fc7).view(batch_size, num_boxes, self.n_classes) self.midn_prob0 = F.softmax(self.midn_prob0, dim=1) # rois self.midn_prob1 = F.softmax(self.midn_prob1, dim=2) # class self.midn_prob = self.midn_prob0 * self.midn_prob1 if self.training: labels = labels.data if torch.isnan(fc7).sum() > 0 or torch.isnan( self.midn_score0.weight.data).sum() > 0: pdb.set_trace() ### RPN ### # use rois_augment to choose pseudo gt gt_RPN, pos_samples = choose_gt(rois, self.midn_prob.clone(), labels) pgt_num_boxes = gt_RPN.shape[0] # use pseudo gt to generate rois_RPN rois_RPN, rpn_loss_cls, rpn_loss_bbox = self.OICR_rpn( base_feat, im_info, gt_RPN, pgt_num_boxes) rois_together = torch.cat( (pos_samples, rois_RPN), 1) # use rois_augment and rois_RPN together init_rois = rois_together.data ret_prob = rois_together.new().new_zeros(1, rois_together.size(1), 21) num_boxes_tog = rois_together.shape[1] pooled_feat = self.OICR_roi_pool(base_feat, rois_together.view(-1, 5)) fc7 = self._head_to_tail(pooled_feat) ic_score = self.ic_score(fc7).view(batch_size, num_boxes_tog, self.n_classes + 1) ic_score1 = self.ic_score1(fc7).view(batch_size, num_boxes_tog, self.n_classes + 1) ic_score2 = self.ic_score2(fc7).view(batch_size, num_boxes_tog, self.n_classes + 1) self.ic_prob = F.softmax(ic_score, dim=2) self.ic_prob1 = F.softmax(ic_score1, dim=2) self.ic_prob2 = F.softmax(ic_score2, dim=2) self.midn_prob0 = self.midn_score0(fc7).view( batch_size, num_boxes_tog, self.n_classes) self.midn_prob1 = self.midn_score1(fc7).view( batch_size, num_boxes_tog, self.n_classes) self.midn_prob0 = F.softmax(self.midn_prob0, dim=1) # rois self.midn_prob1 = F.softmax(self.midn_prob1, dim=2) # class self.midn_prob = self.midn_prob0 * self.midn_prob1 self.global_pool = self.midn_prob.sum(dim=1, keepdim=True) self.global_pool = self.global_pool.view(batch_size, self.n_classes) loss_midn = multi_class_cross_entropy_loss(self.global_pool, labels) ### end ### label_ic, cls_loss_weights = OICRLayer(rois_together, self.midn_prob.clone(), labels) label_ic1, cls_loss_weights1 = OICRLayer(rois_together, self.ic_prob.clone(), labels) label_ic2, cls_loss_weights2 = OICRLayer(rois_together, self.ic_prob1.clone(), labels) if torch.isnan(self.ic_prob).sum().data > 0 or torch.isnan( self.ic_prob1).sum().data > 0 or torch.isnan( self.ic_prob2).sum().data > 0: pdb.set_trace() label_ic = torch.FloatTensor(label_ic).cuda().detach() label_ic1 = torch.FloatTensor(label_ic1).cuda().detach() label_ic2 = torch.FloatTensor(label_ic2).cuda().detach() cls_loss_weights = torch.tensor(cls_loss_weights).cuda().detach() cls_loss_weights1 = torch.tensor(cls_loss_weights1).cuda().detach() cls_loss_weights2 = torch.tensor(cls_loss_weights2).cuda().detach() loss_oicr = WeightedSoftmaxWithLoss(self.ic_prob, label_ic, cls_loss_weights) loss_oicr1 = WeightedSoftmaxWithLoss(self.ic_prob1, label_ic1, cls_loss_weights1) loss_oicr2 = WeightedSoftmaxWithLoss(self.ic_prob2, label_ic2, cls_loss_weights2) # oicr_loss = loss_oicr + loss_oicr1 + loss_oicr2 ret_prob[:, :num_boxes_tog] = (self.ic_prob + self.ic_prob1 + self.ic_prob2) / 3 return init_rois, loss_midn.view(1), loss_oicr.view( 1), loss_oicr1.view(1), loss_oicr2.view( 1), ret_prob, rpn_loss_cls, rpn_loss_bbox else: return self.ic_prob, self.ic_prob1, self.ic_prob2
def forward(self, im_data, im_info, gt_boxes, num_boxes, tgt_im_data, tgt_im_info, tgt_gt_boxes, tgt_num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data tgt_im_info = tgt_im_info.data tgt_gt_boxes = tgt_gt_boxes.data tgt_num_boxes = tgt_num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) tgt_base_feat = self.RCNN_base(tgt_im_data) # if it is training phrase, then use ground trubut bboxes for refining if self.training: # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox, rpn_cls_prob, rois_select = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) tgt_rois, tgt_rpn_loss_cls, tgt_rpn_loss_bbox, tgt_rpn_cls_prob, tgt_rois_select = self.RCNN_rpn( tgt_base_feat, tgt_im_info, tgt_gt_boxes, tgt_num_boxes) roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) tgt_roi_data = self.RCNN_proposal_target(tgt_rois, tgt_gt_boxes, tgt_num_boxes) tgt_rois, tgt_rois_label, tgt_rois_target, tgt_rois_inside_ws, tgt_rois_outside_ws = tgt_roi_data tgt_rois_label = Variable(tgt_rois_label.view(-1).long()) tgt_rois_target = Variable( tgt_rois_target.view(-1, tgt_rois_target.size(2))) tgt_rois_inside_ws = Variable( tgt_rois_inside_ws.view(-1, tgt_rois_inside_ws.size(2))) tgt_rois_outside_ws = Variable( tgt_rois_outside_ws.view(-1, tgt_rois_outside_ws.size(2))) else: # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) tgt_rois, tgt_rpn_loss_cls, tgt_rpn_loss_bbox = self.RCNN_rpn( tgt_base_feat, tgt_im_info, tgt_gt_boxes, tgt_num_boxes) rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 tgt_rois_label = None tgt_rois_target = None tgt_rois_inside_ws = None tgt_rois_outside_ws = None tgt_rpn_loss_cls = 0 tgt_rpn_loss_bbox = 0 rois = Variable(rois) tgt_rois = Variable(tgt_rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # for RCNN grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) tgt_grid_xy = _affine_grid_gen(tgt_rois.view(-1, 5), tgt_base_feat.size()[2:], self.grid_size) tgt_grid_yx = torch.stack( [tgt_grid_xy.data[:, :, :, 1], tgt_grid_xy.data[:, :, :, 0]], 3).contiguous() tgt_pooled_feat = self.RCNN_roi_crop( tgt_base_feat, Variable(tgt_grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: tgt_pooled_feat = F.max_pool2d(tgt_pooled_feat, 2, 2) # for RPN adaptive loss if self.training: grid_xy_ = _affine_grid_gen(rois_select, base_feat.size()[2:], self.grid_size) grid_yx_ = torch.stack( [grid_xy_.data[:, :, :, 1], grid_xy_.data[:, :, :, 0]], 3).contiguous() pooled_feat_ = self.RCNN_roi_crop(base_feat, Variable(grid_yx_).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_ = F.max_pool2d(pooled_feat_, 2, 2) tgt_grid_xy_ = _affine_grid_gen(tgt_rois_select, tgt_base_feat.size()[2:], self.grid_size) tgt_grid_yx_ = torch.stack([ tgt_grid_xy_.data[:, :, :, 1], tgt_grid_xy_.data[:, :, :, 0] ], 3).contiguous() tgt_pooled_feat_ = self.RCNN_roi_crop( tgt_base_feat, Variable(tgt_grid_yx_).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: tgt_pooled_feat_ = F.max_pool2d(tgt_pooled_feat_, 2, 2) elif cfg.POOLING_MODE == 'align': # for RCNN pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) tgt_pooled_feat = self.RCNN_roi_align(tgt_base_feat, tgt_rois.view(-1, 5)) # for RPN adaptive loss if self.training: pooled_feat_ = self.RCNN_roi_align(base_feat, rois_select) tgt_pooled_feat_ = self.RCNN_roi_align(tgt_base_feat, tgt_rois_select) elif cfg.POOLING_MODE == 'pool': # for RCNN pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) tgt_pooled_feat = self.RCNN_roi_pool(tgt_base_feat, tgt_rois.view(-1, 5)) # for RPN adaptive loss if self.training: pooled_feat_ = self.RCNN_roi_pool(base_feat, rois_select) tgt_pooled_feat_ = self.RCNN_roi_pool(tgt_base_feat, tgt_rois_select) # get the adaptive feature for RPN if self.training: rpn_adapt_feat = self.rpn_adapt_feat( pooled_feat_.view(pooled_feat.size(0), -1)) tgt_rpn_adapt_feat = self.rpn_adapt_feat( tgt_pooled_feat_.view(tgt_pooled_feat.size(0), -1)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) tgt_pooled_feat = self._head_to_tail(tgt_pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) tgt_bbox_pred = self.RCNN_bbox_pred(tgt_pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) tgt_bbox_pred_view = tgt_bbox_pred.view( tgt_bbox_pred.size(0), int(tgt_bbox_pred.size(1) / 4), 4) tgt_bbox_pred_select = torch.gather( tgt_bbox_pred_view, 1, tgt_rois_label.view(tgt_rois_label.size(0), 1, 1).expand(tgt_rois_label.size(0), 1, 4)) tgt_bbox_pred = tgt_bbox_pred_select.squeeze(1) # compute object classification probability adapt_feat = self.RCNN_adapt_feat(pooled_feat) cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) tgt_adapt_feat = self.RCNN_adapt_feat(tgt_pooled_feat) tgt_cls_score = self.RCNN_cls_score(tgt_pooled_feat) tgt_cls_prob = F.softmax(tgt_cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 tgt_RCNN_loss_cls = 0 tgt_RCNN_loss_bbox = 0 RCNN_loss_intra = 0 RCNN_loss_inter = 0 RPN_loss_intra = 0 RPN_loss_inter = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) tgt_RCNN_loss_cls = F.cross_entropy(tgt_cls_score, tgt_rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) tgt_RCNN_loss_bbox = _smooth_l1_loss(tgt_bbox_pred, tgt_rois_target, tgt_rois_inside_ws, tgt_rois_outside_ws) # intra-class and inter-class adaptation loss # pull same classes and push away different classes of source and target domains if self.mode == 'adapt': RCNN_loss_intra, RCNN_loss_inter = self.adaptive_loss( adapt_feat, cls_prob, tgt_adapt_feat, tgt_cls_prob, batch_size) # use gcn to cluster the representation of every class elif self.mode == 'gcn_adapt': RCNN_loss_intra, RCNN_loss_inter = self.gcn_adaptive_loss( adapt_feat, cls_prob, rois, tgt_adapt_feat, tgt_cls_prob, tgt_rois, batch_size) # intra-class and inter-class losses for RPN # pull same classes and push away different classes of source and target domains if self.rpn_mode == 'adapt': RPN_loss_intra, RPN_loss_inter = self.adaptive_loss_rpn( rpn_adapt_feat, rpn_cls_prob, tgt_rpn_adapt_feat, tgt_rpn_cls_prob, batch_size) # use gcn to cluster the representation of every class elif self.rpn_mode == 'gcn_adapt': RPN_loss_intra, RPN_loss_inter = self.gcn_adaptive_loss( rpn_adapt_feat, rpn_cls_prob, rois, tgt_rpn_adapt_feat, tgt_rpn_cls_prob, tgt_rois, batch_size) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) tgt_cls_prob = tgt_cls_prob.view(batch_size, tgt_rois.size(1), -1) tgt_bbox_pred = tgt_bbox_pred.view(batch_size, tgt_rois.size(1), -1) if self.training: return rois, tgt_rois, cls_prob, tgt_cls_prob, bbox_pred, tgt_bbox_pred, rpn_loss_cls.view(-1), tgt_rpn_loss_cls.view(-1), \ rpn_loss_bbox.view(-1), tgt_rpn_loss_bbox.view(-1), RCNN_loss_cls.view(-1), tgt_RCNN_loss_cls.view(-1), RCNN_loss_bbox.view(-1), \ tgt_RCNN_loss_bbox.view(-1), RCNN_loss_intra.view(-1), RCNN_loss_inter.view(-1), rois_label, tgt_rois_label, \ RPN_loss_intra.view(-1), RPN_loss_inter.view(-1) else: return rois, tgt_rois, cls_prob, tgt_cls_prob, bbox_pred, tgt_bbox_pred, rpn_loss_cls, tgt_rpn_loss_cls, rpn_loss_bbox, \ tgt_rpn_loss_bbox, RCNN_loss_cls, tgt_RCNN_loss_cls, RCNN_loss_bbox, tgt_RCNN_loss_bbox, \ RCNN_loss_intra, RCNN_loss_inter, rois_label, tgt_rois_label, RPN_loss_intra, RPN_loss_inter
def forward(self, im_data, im_info, gt_boxes, gt_boxes_sens, num_boxes): batch_size = im_data[0].size(0) im_info = im_info.data gt_boxes = gt_boxes.data gt_boxes_sens = gt_boxes_sens.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat_c = self.RCNN_base_c(im_data[0]) base_feat_t = self.RCNN_base_t(im_data[1]) base_feat_fused = 0.5 * (base_feat_c + base_feat_t) base_feat_fused = self.RCNN_base_fused(base_feat_fused) conv5_c = self.RCNN_base_f1(base_feat_c) conv5_t = self.RCNN_base_f2(base_feat_t) # feed fused base feature map to RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat_fused, im_info, gt_boxes, num_boxes) # if it is training phase, then use ground truth bboxes for refining if self.training: # 50% jitter probability if np.random.rand(1)[0]>0.5: jitter = (torch.randn(1,256,4)/20).cuda() else: jitter = (torch.zeros(1,256,4)).cuda() # feed jitter to obtain rois_align_target roi_data = self.RCNN_proposal_target(rois, gt_boxes, gt_boxes_sens, num_boxes, jitter, im_info) rois, rois_jittered, rois_label, rois_target, rois_align_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_align_target = Variable(rois_align_target.view(-1, rois_align_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_jittered = copy.deepcopy(rois) rois_label = None rois_target = None rois_align_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 # Region Feature Alignment module ctx_rois = bbox_contextual_batch(rois) clip_boxes(ctx_rois[:,:,1:], im_info, batch_size) ctx_rois = Variable(ctx_rois) ctx_rois_jittered = bbox_contextual_batch(rois_jittered) clip_boxes(ctx_rois_jittered[:,:,1:], im_info, batch_size) ctx_rois_jittered = Variable(ctx_rois_jittered) if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(ctx_rois.view(-1, 5), conv5_c.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat_c = self.RCNN_roi_crop(conv5_c, Variable(grid_yx).detach()) grid_xy = _affine_grid_gen(ctx_rois_jittered.view(-1, 5), conv5_t.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat_t = self.RCNN_roi_crop(conv5_t, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_c = F.max_pool2d(pooled_feat_c, 2, 2) pooled_feat_t = F.max_pool2d(pooled_feat_t, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat_c = self.RCNN_roi_align(conv5_c, ctx_rois.view(-1, 5)) pooled_feat_t = self.RCNN_roi_align(conv5_t, ctx_rois_jittered.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat_c = self.RCNN_roi_pool(conv5_c, ctx_rois.view(-1,5)) pooled_feat_t = self.RCNN_roi_pool(conv5_t, ctx_rois_jittered.view(-1,5)) pooled_feat_res = pooled_feat_t - pooled_feat_c # feed pooled features to top model pooled_feat_res = self._head_to_tail_align(pooled_feat_res) bbox_align_pred = self.RCNN_bbox_align_pred(pooled_feat_res) RCNN_loss_bbox_align = 0 # Apply bounding-box regression deltas box_deltas = bbox_align_pred.data box_deltas_zeros = torch.zeros(box_deltas.shape).cuda() box_deltas = torch.cat((box_deltas, box_deltas_zeros), 1) # Optionally normalize targets by a precomputed mean and stdev # The roi alignment process is class_agnostic box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(batch_size, -1, 4) rois_sens = rois_jittered.new(rois_jittered.size()).zero_() rois_sens[:,:,1:5] = bbox_transform_inv(rois_jittered[:,:,1:5], box_deltas, batch_size) clip_boxes(rois_sens[:,:,1:5], im_info, batch_size) rois = Variable(rois) rois_sens = Variable(rois_sens) if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(rois.view(-1, 5), conv5_c.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat_c = self.RCNN_roi_crop(conv5_c, Variable(grid_yx).detach()) grid_xy = _affine_grid_gen(rois_sens.view(-1, 5), conv5_t.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat_t = self.RCNN_roi_crop(conv5_t, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_c = F.max_pool2d(pooled_feat_c, 2, 2) pooled_feat_t = F.max_pool2d(pooled_feat_t, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat_c = self.RCNN_roi_align(conv5_c, rois.view(-1, 5)) pooled_feat_t = self.RCNN_roi_align(conv5_t, rois_sens.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat_c = self.RCNN_roi_pool(conv5_c, rois.view(-1, 5)) pooled_feat_t = self.RCNN_roi_pool(conv5_t, rois_sens.view(-1, 5)) cls_score_ref = self.confidence_ref(self.RCNN_top_ref(pooled_feat_c.view(pooled_feat_c.size(0), -1))) cls_score_sens = self.confidence_sens(self.RCNN_top_sens(pooled_feat_t.view(pooled_feat_t.size(0), -1))) cls_prob_ref = F.softmax(cls_score_ref, 1) cls_prob_sens = F.softmax(cls_score_sens, 1) confidence_ref = torch.abs(cls_prob_ref[:,1]-cls_prob_ref[:,0]) confidence_sens = torch.abs(cls_prob_sens[:,1]-cls_prob_sens[:,0]) confidence_ref = confidence_ref.unsqueeze(1).unsqueeze(2).unsqueeze(3) confidence_sens = confidence_sens.unsqueeze(1).unsqueeze(2).unsqueeze(3) pooled_feat_c = confidence_ref * pooled_feat_c pooled_feat_t = confidence_sens * pooled_feat_t pooled_feat = pooled_feat_c + pooled_feat_t # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_cls_ref = 0 RCNN_loss_cls_sens = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) RCNN_loss_cls_ref = F.cross_entropy(cls_score_ref, rois_label) RCNN_loss_cls_sens = F.cross_entropy(cls_score_sens, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) RCNN_loss_bbox_align = _smooth_l1_loss(bbox_align_pred, rois_align_target[:,:2], rois_inside_ws[:,:2], rois_outside_ws[:,:2]) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, rois_sens, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_cls_ref, RCNN_loss_cls_sens, RCNN_loss_bbox, RCNN_loss_bbox_align, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1) # if we want to change batch_size, we should consider to change roi2gt_assignment[0] # roi_part_match[0] and roi_part_match_overlap[0] and so onif self.training: # part_threshold = 0.25 # # # first, calculate the overlaps among rois and gt, get the max roi for each gt (node_cls) overlaps = bbox_overlaps_batch(rois, rois)[0] N_node, _ = overlaps.shape node_list = [i for i in range(N_node)] for j in range(N_node): for k in range(N_node): if overlaps[j][k] != 0: overlaps[j][k] = 1 if k == j: overlaps[j][k] = 0 idx_subgraph, vertex_subgraph = subgraph_split(overlaps) # max_overlaps_rois2gt, roi2gt_assignment = torch.max(overlaps, 1) # # # second, calculate the overlaps among rois and rois_select, # # using threshold to select roi for each rois_select (node_part) # # rois_cls_tmp = rois[:, roi2gt_assignment[0], :] # rois_cls_num = np.argwhere(gt_boxes[:, :, 4].cpu().data.numpy()[0] != 0).shape[0] # rois_cls_tmp = rois_cls_tmp[:,:rois_cls_num, :] # rois_cls = rois_cls_tmp.new(rois_cls_tmp.size(0), rois_cls_tmp.size(1), 5).zero_() # rois_cls[:, :, :4] = rois_cls_tmp[:, :, 1:5] # rois_cls[:, :, 4] = rois_cls_tmp[:, :, 0] # # # rois_cls_idx_list is the idx related from rois_cls to rois # roi_cls_idx_list = roi2gt_assignment[0][:rois_cls_num] # # overlaps = bbox_overlaps_batch(rois, rois_cls) # max_overlaps_rois2cls, roi2cls_assignment = torch.max(overlaps, 2) # # roi_part_match_overlap = max_overlaps_rois2cls.cpu().data.numpy() # roi_part_match = roi2cls_assignment.cpu().data.numpy() # # # roi_part_idx_list is the idx related from rois_part to rois # roi_part_idx_list = [] # roi_part_match_idx = np.unique(roi_part_match[0]) # for roi_cls_idx in roi_part_match_idx: # match_idx_tmp = np.transpose(np.argwhere(roi_part_match[0] == roi_cls_idx))[0] # match_overlap_tmp = roi_part_match_overlap[0][match_idx_tmp] # # use threshold to select rois_part # match_idx_tmp_select = np.transpose(np.argwhere(match_overlap_tmp > part_threshold))[0] # match_idx_tmp = match_idx_tmp[match_idx_tmp_select] # roi_part_idx_list.append(torch.from_numpy(match_idx_tmp)) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # # update 20191027: build graph for rois based on index (default: batch_size = 1) # adj_jud = np.zeros((0)) # adj_rois = torch.zeros(0).cuda().long() # for i in range(roi_cls_idx_list.shape[0]): # adj_jud = np.concatenate((adj_jud, [1])) # adj_rois = torch.cat((adj_rois, roi_cls_idx_list[i:i+1])) # try: # adj_jud = np.concatenate((adj_jud, np.zeros((roi_part_idx_list[i].shape[0])))) # adj_rois = torch.cat((adj_rois, roi_part_idx_list[i].cuda())) # except IndexError: # print ('IndexError happen, continue') # continue # # node_cls_idx = np.transpose(np.argwhere(adj_jud == 1))[0] # # adj_matrix_bin = np.zeros((len(adj_jud), len(adj_jud))) # # # link edges for node_cls to node_cls # for k in range(len(node_cls_idx)-1): # idx_node_cls_1 = node_cls_idx[k] # idx_node_cls_2 = node_cls_idx[k + 1] # adj_matrix_bin[idx_node_cls_1, idx_node_cls_2] = 1 # adj_matrix_bin[idx_node_cls_2, idx_node_cls_1] = 1 # # # link edges for node_cls to related node_part # for k in range(len(node_cls_idx)-1): # idx_start = node_cls_idx[k] # idx_end = node_cls_idx[k + 1] # for s in range(idx_start, idx_end): # for t in range(idx_start, idx_end): # if s == t: # adj_matrix_bin[s, t] = 0 # else: # adj_matrix_bin[s, t] = 1 # # calculate the adj_mat based on adj_matrix_bin, the weights on edges are the cosine distance between nodes # adj_matrix = np.zeros((len(adj_jud), len(adj_jud))) # # cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6) # # for s in range(len(adj_jud)): # for t in range(len(adj_jud)): # if adj_matrix_bin[s, t] == 1: # node_feat_s = pooled_feat[adj_rois[s], :] # node_feat_t = pooled_feat[adj_rois[t], :] # adj_matrix[s, t] = cos(node_feat_s, node_feat_t) # else: # adj_matrix[s, t] = 0 # # adj_matrix = torch.from_numpy(adj_matrix).float().cuda() # # pooled_feat[adj_rois, :] = F.relu(self.gcn1(pooled_feat[adj_rois, :], adj_matrix)) # pooled_feat[adj_rois, :] = F.relu(self.gcn2(pooled_feat[adj_rois, :], adj_matrix)) # adj_jud = np.zeros((N_node, N_node)) adj_matrix = np.zeros((N_node, N_node)) # # for k in range(idx_subgraph): # idx_k = np.transpose(np.argwhere(vertex_subgraph == k))[0] # for s in range(idx_k.shape[0]): # for t in range(idx_k.shape[0]): # if s == t: # adj_jud[s, t] = 0 # else: # adj_jud[s, t] = 1 # cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6) for s in range(N_node): for t in range(N_node): #if adj_jud[s,t] != 0: if s != t: node_feat_s = pooled_feat[s, :] node_feat_t = pooled_feat[t, :] adj_matrix[s, t] = cos(node_feat_s, node_feat_t) adj_matrix = torch.from_numpy(adj_matrix).float().cuda() pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix)) pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix)) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # update 2019-6-17:fix the bug for dimension specified as 0... if self.training: rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 # ========= Union Box ========== whole_box = union_box_layer(rois, im_info) whole_box = whole_box.reshape(whole_box.shape[0], 1, 5) whole = torch.from_numpy(whole_box) whole = whole.type(torch.cuda.FloatTensor) # whole = whole_box.view([-1, 5]) # edges = edge_box_layer(rois, im_info) # edges = torch.from_numpy(edges) # edge = edges.view([-1, 12]) edges_all = edge_whole_layer(rois, im_info) edges_all = torch.from_numpy(edges_all) # whole_rois = torch.cat((whole, rois), 1) rois = Variable(rois) # print rois.size() # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) whole_pool_feat = self.RCNN_roi_align_whole( base_feat, whole.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) whole_pool_feat = self.RCNN_roi_pool(base_feat, whole.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) whole_pool_feat = self._head_to_tail(whole_pool_feat) ##########structure_inference_spmm################# # pooled_feat = structure_inference_spmm(pooled_feat , whole_pool_feat, edges, rois.size()[1]) pooled_feat = self.Structure_inference(edges_all, pooled_feat, whole_pool_feat, rois.size()[1]) # print 'pooled_feat.shape: ', pooled_feat.shape # print 'rois.shape: ', rois.shape # print 'edges.shape: ', edges.shape #coordinate = self.coor_fc( rois[:,:,1:].reshape(rois.shape[1], 4) ) #pooled_feat = torch.cat(( coordinate ,pooled_feat),1) #pooled_feat = torch.add(coordinate, pooled_feat) # ######### external_dim ########### # # external_feature = rois[:,:,3:].view([128,2]) # pooled_feat = self.External(pooled_feat,external_feature) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes, target=False, eta=1.0): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) if self.context: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) if target: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) return domain_p #, diff _, feat = self.netD(base_feat.detach()) else: domain_p = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return domain_p #,diff # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) if self.context: feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat, pooled_feat), 1) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, domain_p #,diff
def forward(self, im_data, im_info, gt_boxes, num_boxes, target=False, eta=1.0): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # get all vector of class for label if self.training and target: cls_label_ind = torch.unique(gt_boxes[:, :, 4].cpu()) cls_label = torch.zeros(self.n_classes) cls_label[cls_label_ind.long()] = 1 # assume always have backgound categories cls_label[0] = 1 cls_label = cls_label.cuda() cls_label.requires_grad = False # feed image data to base model to obtain base feature map base_feat1 = self.RCNN_base1(im_data) if self.lc: d_pixel, _ = self.netD_pixel_1(grad_reverse(base_feat1, lambd=eta)) # print(d_pixel) if not target: _, feat_pixel = self.netD_pixel_1(base_feat1.detach()) else: d_pixel = self.netD_pixel_1(grad_reverse(base_feat1, lambd=eta)) base_feat2 = self.RCNN_base2(base_feat1) if self.lc: d_pixel_2, _ = self.netD_pixel_2( grad_reverse(base_feat2, lambd=eta)) else: d_pixel_2 = self.netD_pixel_2(grad_reverse(base_feat2, lambd=eta)) base_feat3 = self.RCNN_base3(base_feat2) if self.lc: d_pixel_3, _ = self.netD_pixel_3( grad_reverse(base_feat3, lambd=eta)) else: d_pixel_3 = self.netD_pixel_3(grad_reverse(base_feat3, lambd=eta)) # print(d_pixel_3.mean()) base_feat4 = self.RCNN_base4(base_feat3) if self.gc: d_pixel_4, _ = self.netD_1(grad_reverse(base_feat4, lambd=eta)) else: d_pixel_4 = self.netD_1(grad_reverse(base_feat4, lambd=eta)) # something wrong base_feat = self.RCNN_base5(base_feat4) # for target domain training, we need to return the d_pixel, domain_p if self.gc: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p _, feat = self.netD(base_feat.detach()) else: domain_p = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training and not target: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) #feat_pixel = torch.zeros(feat_pixel.size()).cuda() if self.lc: feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat_pixel, pooled_feat), 1) if self.gc: feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat, pooled_feat), 1) # compute bbox offset # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic and not target: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) # compute the sum of weakly score if False: #cls_prob_sum = torch.sum(cls_prob, 0) # x = max(1, x) #cls_prob_sum = cls_prob_sum.repeat(2, 1) #cls_prob_sum = torch.min(cls_prob_sum, 0)[0] max_roi_cls_prob = torch.max(cls_prob, 0)[0] #assert (max_roi_cls_prob.data.cpu().numpy().all() >= 0. and max_roi_cls_prob.data.cpu().numpy().all() <= 1.) if not (max_roi_cls_prob.data.cpu().numpy().all() >= 0. and max_roi_cls_prob.data.cpu().numpy().all() <= 1.): pdb.set_trace() if not (cls_label.data.cpu().numpy().all() >= 0. and cls_label.data.cpu().numpy().all() <= 1.): pdb.set_trace() BCE_loss = F.binary_cross_entropy(max_roi_cls_prob, cls_label) return d_pixel, domain_p, BCE_loss RCNN_loss_cls = 0 RCNN_loss_bbox = 0 # for weakly detection, concentrate the cls_score and calculate the loss if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p # ,diff
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) #rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois, rois_label, rois_scale_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) # EDIT rois_scale_label = Variable(rois_scale_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None # EDIT rois_scale_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois #print(rois) if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 6), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 6)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 6)) #print(pooled_feat.shape) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) if self.training: fg_inds = torch.squeeze((rois_scale_label >= 0).nonzero(), 1) #print((rois_scale_label >= 0).nonzero().shape) # compute object scale classification probability # EDIT scale_score = self.RCNN_scale_score(pooled_feat) scale_score = scale_score[fg_inds] # EDIT rois_scale_label = rois_scale_label[fg_inds] #print(rois_scale_label) #print(scale_score) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 RCNN_loss_scale = 0 RCNN_loss_scale_adv = 0 RCNN_acc_scale = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) # scale classification loss # EDIT #print(pooled_feat) #print(scale_score) #print(rois_scale_label) RCNN_loss_scale = F.cross_entropy(scale_score, rois_scale_label) #RCNN_loss_scale_adv = torch.sum(F.softmax(scale_score, dim=1) * F.log_softmax( torch.clamp(logsoftmax(scale_score), min=1e-10, max=1.0), dim=1)) softmax = nn.Softmax(dim=1) RCNN_loss_scale_adv = torch.mean( torch.sum(softmax(scale_score) * torch.log(torch.clamp(softmax(scale_score), min=1e-10, max=1.0)), 1)) correct = scale_score.max(1)[1].type_as(rois_scale_label).eq(rois_scale_label) if not hasattr(correct, 'sum'): correct = correct.cpu() correct = correct.sum().type(torch.FloatTensor).cuda() RCNN_acc_scale = correct / scale_score.size(0) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) #https://github.com/jwyang/faster-rcnn.pytorch/issues/226 if self.training: rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) RCNN_loss_scale = torch.unsqueeze(RCNN_loss_scale, 0) RCNN_loss_scale_adv = torch.unsqueeze(RCNN_loss_scale_adv, 0) RCNN_acc_scale = torch.unsqueeze(RCNN_acc_scale, 0) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, \ RCNN_loss_scale, RCNN_loss_scale_adv, RCNN_acc_scale, \ rois_label return rois, cls_prob, bbox_pred, RCNN_acc_scale
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # print('--faster rcnn -- ,im_data = {},im_info = {},gt_boxes = {},num_boxes = {}'.format(im_data.size(), im_info, # gt_boxes, num_boxes)) # feed image data to base model to obtain base feature map base_feat_times = self.RCNN_base(im_data) # [1, 832, L/4, 7, 7] # print('base_feat_times size = {}'.format(base_feat_times.size())) if self.base_feature_mean: base_feat_key = torch.mean(base_feat_times, 2) else: base_feat_time_len = base_feat_times.size(2) base_feat_key_time = base_feat_time_len // 2 base_feat_key = base_feat_times[:, :, base_feat_key_time, :, :] # base_feat_key_time = 4,base_fear_middle size = torch.Size([1, 832, 7, 7]) # print('base_feat_times = {} ,base_fear_middle size = {}'.format(base_feat_times.size(),base_feat_key.size())) # feed base feature map tp RPN to obtain rois,rois 在proposal layer中已经进行了第一次修正 # print('im_info = {},gt_boxes = {},num_boxes = {} '.format(im_info,gt_boxes, num_boxes)) rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat_key, im_info, gt_boxes, num_boxes) # rois size = test:([1, 300, 5]) , train:[b, 2000, 5] # rois[0,0,:] =[ 0.0000, 190.2723, 39.9991, 208.7142, 102.8349] ,[ 0.0000, 222.2723, 167.9991, 240.7142, 230.8349] # print('rpn_loss_cls = {}, rpn_loss_cls = {}, rois size = {},rois ={}'.format(rpn_loss_cls,rpn_loss_cls, rois.size(),rois[0,1000,:])) # need replicating in time dim for rois # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) #train: rois = ([b, 128, 5]), rois_label = [b*128], rois_target size = torch.Size([b*128, 4]) #print('---RCNN_proposal_target----,rois = {}, rois_label = {}, rois_target size = {},rois_outside_ws ={}' # .format(rois.size(), rois_label.size(), rois_target.size(), rois_outside_ws.size)) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable( rois ) #[b,max_num,(label,w,h,x,y)], test: ([b, 300, 5] train: ([b, 128, 5]) #recycle roi pooling # roi_pooled_fs = [] # for i in range(base_feat_time_len): # # pooled_feat = self.RCNN_roi_pool(base_feat_times[:,:,i,:,:], rois.view(-1, 5)) # # print('pooled_feat size = {}'.format(pooled_feat.size())) # torch.c # roi_pooled_fs.append(pooled_feat) # # print('roi_pooled_fs size = {}'.format(len(roi_pooled_fs))) if self.is_pool: pooled_feat_0 = self.RCNN_roi_pool( base_feat_times[:, :, 0, :, :], rois.view(-1, 5) ) # [b*num,c,w,h] = test:([300, 832, 7, 7]) ,train: [b*128, 832, 7, 7] pooled_feat_0 = torch.unsqueeze(pooled_feat_0, 2) pooled_feat_1 = torch.unsqueeze( self.RCNN_roi_pool(base_feat_times[:, :, 1, :, :], rois.view(-1, 5)), 2) pooled_feat_2 = torch.unsqueeze( self.RCNN_roi_pool(base_feat_times[:, :, 2, :, :], rois.view(-1, 5)), 2) pooled_feat_3 = torch.unsqueeze( self.RCNN_roi_pool(base_feat_times[:, :, 3, :, :], rois.view(-1, 5)), 2) # pooled_feat_4 = torch.unsqueeze(self.RCNN_roi_pool(base_feat_times[:, :, 4, :, :], rois.view(-1, 5)),2) # pooled_feat_5 = torch.unsqueeze(self.RCNN_roi_pool(base_feat_times[:, :, 5, :, :], rois.view(-1, 5)),2) # pooled_feat_6 = torch.unsqueeze(self.RCNN_roi_pool(base_feat_times[:, :, 6, :, :], rois.view(-1, 5)),2) # pooled_feat_7 = torch.unsqueeze(self.RCNN_roi_pool(base_feat_times[:, :, 7, :, :], rois.view(-1, 5)),2) # print('pooled_feat7 size = {},pooled_feat0 size = {}'.format(pooled_feat_7.size(),pooled_feat_0.size())) else: grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat_key.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat_0 = F.max_pool2d( self.RCNN_roi_crop(base_feat_times[:, :, 0, :, :], Variable(grid_yx).detach()), 2, 2) pooled_feat_0 = torch.unsqueeze(pooled_feat_0, 2) pooled_feat_1 = F.max_pool2d( self.RCNN_roi_crop(base_feat_times[:, :, 1, :, :], Variable(grid_yx).detach()), 2, 2) pooled_feat_1 = torch.unsqueeze(pooled_feat_1, 2) pooled_feat_2 = F.max_pool2d( self.RCNN_roi_crop(base_feat_times[:, :, 2, :, :], Variable(grid_yx).detach()), 2, 2) pooled_feat_2 = torch.unsqueeze(pooled_feat_2, 2) pooled_feat_3 = F.max_pool2d( self.RCNN_roi_crop(base_feat_times[:, :, 3, :, :], Variable(grid_yx).detach()), 2, 2) pooled_feat_3 = torch.unsqueeze(pooled_feat_3, 2) # test:([b*300, 832,4, 7, 7]) ,train: [b*128, 832, 4,7, 7] pooled_feat_cat = torch.cat( [pooled_feat_0, pooled_feat_1, pooled_feat_2, pooled_feat_3], 2) #,pooled_feat_4,pooled_feat_5,pooled_feat_6,pooled_feat_7],2) #print('pooled_feat0 size = {} , pooled_feat_cat size = {}'.format(pooled_feat_0.size(),pooled_feat_cat.size())) # test: ([b * 300, 1024]),train:[b*128,1024] pooled_feat = self._head_to_tail(pooled_feat_cat) # print('after top pooled_feat size = {}'.format(pooled_feat.size())) bbox_pred = self.RCNN_bbox_pred(pooled_feat) # print('bbox_pred size = {}'.format(bbox_pred.size())) cls_score = self.RCNN_cls_score(pooled_feat) # print('cls_score size = {}'.format(cls_score.size())) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # print('cls_score = {}, bbox_pred = {}'.format(cls_prob,bbox_pred)) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes, t_rois=None): batch_size = im_data.size(0) base_feat = self.RCNN_base(im_data) if t_rois is None: rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) else: rois = t_rois # if it is training phase, then use ground truth bboxes for refining if self.training and t_rois is None: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat_post = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat_post) cls_score = self.RCNN_cls_score(pooled_feat_post) cls_prob = F.softmax(cls_score, 1) if t_rois is not None: return pooled_feat_post, cls_prob if self.training and not self.class_agnostic and t_rois is None: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) if self.training: return rois_label, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox else: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes, domain=None, l=0, loss_start=False): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes, domain, self.transfer) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws, domain_label = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) #-----------------------transfer learning----------------------------# #print(domain) dom_loss = 0 #base line: transfer == False if self.training and self.transfer: if self.grl: dom_input = ReverseLayerF.apply(pooled_feat, l) else: dom_input = pooled_feat dom_pred = self._domain_classify(dom_input) domain_label = Variable(domain_label.cpu().cuda().view(-1).long()) ############Process Tranfer Loss Weight######### if loss_start: p_target = F.softmax(dom_pred * self.transfer_gamma)[:, 0] domain_label.data = domain_label.data.type( torch.FloatTensor).cuda() l_target = domain_label self.weight = p_target**l_target ############################################### ##############DOMAIN LOSS SELECTION########## else: ids = torch.LongTensor(1).cuda() # random select if self.transfer_select == 'RANDOM': perm = torch.randperm(rois.size(1)) ids = perm[:rois.size(1) / 8].cuda() # select positive sample and predicted postive sample elif self.transfer_select == 'CONDITION': ids = torch.range(0, rois.size(1) / 8 - 1) ids = torch.Tensor.long(ids).cuda() # select all postive sample elif self.transfer_select == 'POSITIVE': ids = torch.nonzero(rois_label.data) ids = torch.squeeze(ids).cuda() # select all postive sample elif self.transfer_select == 'BALANCE': ids_p = torch.nonzero(rois_label.data) ids_p = torch.squeeze(ids_p).cuda() ids_n = (rois_label.data == 0).nonzero() ids_n = torch.squeeze(ids_n).cuda() ids_n = ids_n[:ids_p.size(0)] ids = torch.cat((ids_p, ids_n), 0).cuda() # select all sample if self.transfer_select == 'ALL': dom_pred_loss = dom_pred dom_label_loss = domain_label else: dom_pred_loss = dom_pred[ids] dom_label_loss = domain_label[ids] ##########DOMAIN LOSS SELECTION DONE########## dom_loss = F.cross_entropy(dom_pred_loss, dom_label_loss) dom_loss = dom_loss * ( self.transfer_weight.expand_as(dom_loss)) #---------------------transfer learning done-------------------------# # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss if self.transfer and loss_start: rois_label_loss = torch.eye( self.n_classes)[rois_label.data.cpu()].type( torch.FloatTensor) rois_label_loss = Variable(rois_label_loss.cuda()) weight_loss_cls = self.weight.view(rois_label.size(0), 1).repeat( 1, self.n_classes) RCNN_loss_cls = F.binary_cross_entropy_with_logits( cls_score, rois_label_loss, weight_loss_cls) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws, True, True, self.weight) else: RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, dom_loss
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data self.batch_size = im_data.size(0) # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # Base feature base_feat = self.RCNN_conv_new(base_feat) # Local feature with PS-ROIPooling # Local classification local_cls_feat = self.RCNN_local_cls_base(base_feat) local_cls_feat = self.RCNN_psroi_pool_cls(local_cls_feat, rois.view(-1, 5)) local_cls = self.avg_pooling(local_cls_feat) local_cls = self.RCNN_local_cls_fc(local_cls) # Local bbox regression local_bbox_feat = self.RCNN_local_bbox_base(base_feat) local_bbox_feat = self.RCNN_psroi_pool_loc(local_bbox_feat, rois.view(-1, 5)) local_bbox = self.avg_pooling(local_bbox_feat) # Global feature with ROIPooling if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) global_base = self.RCNN_global_base(pooled_feat) global_cls = self.RCNN_global_cls(global_base) global_bbox = self.RCNN_global_bbox(global_base) # fusion global feature and local feature cls_score = (local_cls + global_cls).squeeze() bbox_pred = (local_bbox + global_bbox).squeeze() if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) cls_prob = F.softmax(cls_score, dim=1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: loss_func = self.ohem_detect_loss if cfg.TRAIN.OHEM else self.detect_loss RCNN_loss_cls, RCNN_loss_bbox = loss_func(cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes, return_feats=False, oracle_rois=None): """ :param im_data: :param im_info: :param gt_boxes: :param num_boxes: :param return_feats: :param oracle_rois: Use GT ROIs for feature extraction (NOT SUPPORTED DURING TRAINING!!!) :return: """ if self.training and oracle_rois is not None: raise NotImplementedError( "We do not support using oracle ROIs during training phase.") batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) if not self.printed: print("base_feat: {}".format(base_feat.shape)) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) if not self.printed: print("type of rois: {}".format(type(rois))) print("rois: {}".format(rois.shape)) # 1 X num objects X 5 # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 if oracle_rois is not None: rois = torch.from_numpy(oracle_rois).float() rois = torch.unsqueeze(rois, dim=0) if not self.printed: print("rois.Variable.shape: {}".format(rois.shape)) print("rois: {}".format(rois)) rois = Variable(rois).cuda() # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) if not self.printed: print("pooled_feat.shape: {}".format(pooled_feat.shape)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) self.printed = True if not return_feats: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label else: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, \ pooled_feat
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \ RCNN_loss_cls, RCNN_loss_bbox, rois_label \ = self.FRCN(im_data, im_info, gt_boxes, num_boxes) # get global and local region from Faster R-CNN base_feat = self.FRCN.RCNN_base(im_data) #print(rois.data.cpu().numpy()) scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = self.FRCN._bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if self.class_agnostic: if self.use_cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda( ) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS) * torch.FlaotTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if self.use_cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda( ) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torhc.FlaotTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(self.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() # get global region thresh = 0.01 region_g = np.ndarray((0, 5)) region_l = np.ndarray((0, 5)) for j in range(1, 4): inds = torch.nonzero(scores[:, j] >= thresh).view(-1) inds_l = torch.nonzero(scores[:, j + 3] >= thresh).view(-1) #print(inds) if inds.numel() > 0 and inds_l.numel() > 0: cls_scores = scores[:, j][inds] cls_scores_l = scores[:, j + 3][inds_l] #print(cls_scores) #print(cls_scores_l) _, order = torch.sort(cls_scores, 0, True) _, order_l = torch.sort(cls_scores_l, 0, True) if self.class_agnostic: cls_boxes = pred_boxes[inds] cls_boxes_l = pred_boxes[inds_l] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_boxes_l = pred_boxes[inds_l][:, (j + 3) * 4:(j + 4) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets_l = torch.cat( (cls_boxes_l, cls_scores_l.unsqueeze(1)), 1) cls_dets = cls_dets[order] cls_dets_l = cls_dets_l[order] region_g = np.vstack((region_g, cls_dets)) region_l = np.vstack((region_l, cls_dets_l)) """ keep = nms(cls_dets, 0.9, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] keep = nms(cls_dets_l, 0.9, force_cpu=not cfg.USE_GPU_NMS) cls_dets_l = cls_dets_l[keep.view(-1).long()] cls_dets = cls_dets[order] cls_dets_l = cls_dets_l[order_l] sort_ind = np.argsort(cls_dets[...,-1]) high_ind = sort_ind[-self.minibatch//2:] low_ind = sort_ind[:self.minibatch//2] region_g = np.vstack((region_g. cls_dets[high_ind])) region_g = np.vstack((region_g, cls_dets[low_ind]))] sort_ind = np.argsort(cls_dets_l[..., -1]) high_ind = sort_ind[-self.minibatch//2:] low_ind = sort_ind[:self.minibatch//2] region_l = np.vstack((region_l, cls_dets_l[high_ind])) reigon_l = np.vstack((region_l, cls_dets_l[low_ind])) """ #region_g = np.vstack((region_g, cls_dets[np.argmax(cls_dets[..., -1])])) #region_l = np.vstack((region_l, cls_dets_l[np.argmax(cls_dets_l[..., -1])])) if not self.training: self.minibatch = 1 if self.training: keep = nms(torch.tensor(region_g).cuda(), 0.9, force_cpu=not cfg.USE_GPU_NMS) if type(keep) is not list: keep = keep.view(-1).long() region_g = region_g[keep] sort_ind = np.argsort(region_g[..., -1]) high_ind_g = sort_ind[-self.minibatch // 2:] low_ind_g = sort_ind[:self.minibatch // 2] keep = nms(torch.tensor(region_l).cuda(), 0.9, force_cpu=not cfg.USE_GPU_NMS) if type(keep) is not list: keep = keep.view(-1).long() region_l = region_l[keep] sort_ind = np.argsort(region_l[..., -1]) high_ind_l = sort_ind[-self.minibatch // 2:] low_ind_l = sort_ind[:self.minibatch // 2] high_num = min(len(high_ind_g), len(high_ind_l)) high_ind_g = high_ind_g[:high_num] high_ind_l = high_ind_l[:high_num] low_num = min(len(low_ind_g), len(low_ind_l)) low_ind_g = low_ind_g[:low_num] low_ind_l = low_ind_l[:low_num] proposal_g = np.vstack((region_g[high_ind_g], region_g[low_ind_g])) proposal_l = np.vstack((region_l[high_ind_l], region_l[low_ind_l])) #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g) #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l) gt_boxes = gt_boxes.cpu().numpy()[0, :2] gt_g = gt_boxes[np.where(gt_boxes[..., -1] < 4)[0]] gt_l = gt_boxes[np.where(gt_boxes[..., -1] >= 4)[0]] # compute pare ground truth def compute_iou(ps, gt, th=0.5): iou_x1 = np.maximum(ps[..., 0], gt[0]) iou_y1 = np.maximum(ps[..., 1], gt[1]) iou_x2 = np.minimum(ps[..., 2], gt[2]) iou_y2 = np.minimum(ps[..., 3], gt[3]) iou_w = np.maximum(iou_x2 - iou_x1, 0) iou_h = np.maximum(iou_y2 - iou_y1, 0) iou_area = iou_w * iou_h gt_area = (gt[2] - gt[0]) * (gt[3] - gt[1]) p_area = (ps[..., 2] - ps[..., 0]) * (ps[..., 3] - ps[..., 1]) overlap = iou_area / (gt_area + p_area - iou_area) count = np.zeros((ps.shape[0]), dtype=int) count[overlap >= self.gt_iou] += 1 return count cou = compute_iou(proposal_g, gt_g[0]) + compute_iou( proposal_l, gt_l[0]) ## 2019.2.13 #glcc_gt = np.zeros((proposal_g.shape[0]), dtype=int) #gilcc_gt[cou==2] = gt_g[0,-1] glcc_gt = np.array([gt_g[0, -1]], dtype=int) glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda() self.glcc_gt.data.resize_(glcc_gt.size()).copy_(glcc_gt) else: # test phase proposal_g = region_g[np.argmax(region_g[..., -1])][None, ...] proposal_l = region_l[np.argmax(region_l[..., -1])][None, ...] #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g.size()) #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l.size()) # if true, then show detection global and local region if False: gt_boxes = gt_boxes.astype(np.int) im = im_data.cpu().numpy()[0] im = np.transpose(im, (1, 2, 0))[..., ::-1] im -= im.min() im /= im.max() plt.imshow(im.astype(np.float)) ax = plt.axes() ax.add_patch( plt.Rectangle((region_g[0, 0], region_g[0, 1]), region_g[0, 2] - region_g[0, 0], region_g[0, 3] - region_g[0, 1], fill=False, edgecolor='red', linewidth=1)) ax.add_patch( plt.Rectangle((region_l[0, 0], region_l[0, 1]), region_l[0, 2] - region_l[0, 0], region_l[0, 3] - region_l[0, 1], fill=False, edgecolor='yellow', linewidth=1)) ax.add_patch( plt.Rectangle((gt_boxes[0, 0], gt_boxes[0, 1]), gt_boxes[0, 2] - gt_boxes[0, 0], gt_boxes[0, 3] - gt_boxes[0, 1], fill=False, edgecolor='green', linewidth=1)) ax.add_patch( plt.Rectangle((gt_boxes[1, 0], gt_boxes[1, 1]), gt_boxes[1, 2] - gt_boxes[1, 0], gt_boxes[1, 3] - gt_boxes[1, 1], fill=False, edgecolor='white', linewidth=1)) plt.show() rois_g = np.zeros((1, proposal_g.shape[0], 5), dtype=np.float32) rois_g[0, :, 1:5] = proposal_g[:, :4] #rois_g /= 16. rois_l = np.zeros((1, proposal_l.shape[0], 5), dtype=np.float32) rois_l[0, :, 1:5] = proposal_l[:, :4] #rois_l /= 16. rois_g = torch.tensor(rois_g, dtype=torch.float).cuda() rois_l = torch.tensor(rois_l, dtype=torch.float).cuda() self.rois_g.data.resize_(rois_g.size()).copy_(rois_g) self.rois_l.data.resize_(rois_l.size()).copy_(rois_l) # global region if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(self.rois_g.view(-1, 5), base_feat.size()[2:], self.FRCN.grid_size) grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]], 3).contiguous() pooled_feat_g = self.FRCN.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_g = F.max_pool2d(pooled_feat_g, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat_g = self.FRCN.RCNN_roi_align(base_feat, self.rois_g.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat_g = self.FRCN.RCNN_roi_pool(base_feat, self.rois_g.view(-1, 5)) # local region if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(self.rois_l.view(-1, 5), base_feat.size()[2:], self.FRCN.grid_size) grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]], 3).contiguous() pooled_feat_l = self.FRCN.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_l = F.max_pool2d(pooled_feat_l, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat_l = self.FRCN.RCNN_roi_align(base_feat, self.rois_l.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat_l = self.FRCN.RCNN_roi_pool(base_feat, self.rois_l.view(-1, 5)) #print(pooled_feat_g.cpu().detach().numpy().shape) x = torch.cat((pooled_feat_g, pooled_feat_l), dim=1) #print(x.cpu().detach().numpy().shape) x = self.glcc_conv1(x) x = F.relu(x) x = x.view(-1, self.roipool * self.roipool * 512) x = self.glcc_fc1(x) x = F.relu(x) x = nn.Dropout()(x) x = self.glcc_fc2(x) x = F.relu(x) x = nn.Dropout()(x) glcc_out = self.glcc_fc_out(x) if self.training: glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda() glcc_loss = F.cross_entropy(glcc_out, self.glcc_gt) else: glcc_loss = 0. return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, glcc_out, glcc_loss
def forward( self, im_data, im_info, im_cls_lb, gt_boxes, num_boxes, need_backprop, tgt_im_data, tgt_im_info, tgt_gt_boxes, tgt_num_boxes, tgt_need_backprop, ): if not (need_backprop.detach() == 1 and tgt_need_backprop.detach() == 0): need_backprop = torch.Tensor([1]).cuda() tgt_need_backprop = torch.Tensor([0]).cuda() assert need_backprop.detach() == 1 and tgt_need_backprop.detach() == 0 batch_size = im_data.size(0) im_info = im_info.data # (size1,size2, image ratio(new image / source image) ) im_cls_lb = im_cls_lb.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data need_backprop = need_backprop.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) cls_feat = self.conv_lst(self.avg_pool(base_feat)).squeeze(-1).squeeze(-1) img_cls_loss = nn.BCEWithLogitsLoss()(cls_feat, im_cls_lb) # feed base feature map tp RPN to obtain rois self.RCNN_rpn.train() rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes ) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2)) ) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == "align": pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == "pool": pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view( bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4 ) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand( rois_label.size(0), 1, 4 ), ) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss( bbox_pred, rois_target, rois_inside_ws, rois_outside_ws ) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) """ =================== for target ==========================""" tgt_batch_size = tgt_im_data.size(0) tgt_im_info = ( tgt_im_info.data ) # (size1,size2, image ratio(new image / source image) ) tgt_gt_boxes = tgt_gt_boxes.data tgt_num_boxes = tgt_num_boxes.data tgt_need_backprop = tgt_need_backprop.data # feed image data to base model to obtain base feature map tgt_base_feat = self.RCNN_base(tgt_im_data) # feed base feature map tp RPN to obtain rois self.RCNN_rpn.eval() tgt_rois, tgt_rpn_loss_cls, tgt_rpn_loss_bbox = self.RCNN_rpn( tgt_base_feat, tgt_im_info, tgt_gt_boxes, tgt_num_boxes ) # if it is training phrase, then use ground trubut bboxes for refining tgt_rois_label = None tgt_rois_target = None tgt_rois_inside_ws = None tgt_rois_outside_ws = None tgt_rpn_loss_cls = 0 tgt_rpn_loss_bbox = 0 tgt_rois = Variable(tgt_rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == "crop": # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) tgt_grid_xy = _affine_grid_gen( tgt_rois.view(-1, 5), tgt_base_feat.size()[2:], self.grid_size ) tgt_grid_yx = torch.stack( [tgt_grid_xy.data[:, :, :, 1], tgt_grid_xy.data[:, :, :, 0]], 3 ).contiguous() tgt_pooled_feat = self.RCNN_roi_crop( tgt_base_feat, Variable(tgt_grid_yx).detach() ) if cfg.CROP_RESIZE_WITH_MAX_POOL: tgt_pooled_feat = F.max_pool2d(tgt_pooled_feat, 2, 2) elif cfg.POOLING_MODE == "align": tgt_pooled_feat = self.RCNN_roi_align(tgt_base_feat, tgt_rois.view(-1, 5)) elif cfg.POOLING_MODE == "pool": tgt_pooled_feat = self.RCNN_roi_pool(tgt_base_feat, tgt_rois.view(-1, 5)) # feed pooled features to top model tgt_pooled_feat = self._head_to_tail(tgt_pooled_feat) if tgt_pooled_feat.shape[0] > pooled_feat.shape[0]: tgt_pooled_feat = tgt_pooled_feat[: pooled_feat.shape[0]] """ DA loss """ # DA LOSS DA_img_loss_cls = 0 DA_ins_loss_cls = 0 tgt_DA_img_loss_cls = 0 tgt_DA_ins_loss_cls = 0 base_score, base_label = self.RCNN_imageDA(base_feat, need_backprop) # Image DA base_prob = F.log_softmax(base_score, dim=1) DA_img_loss_cls = F.nll_loss(base_prob, base_label) instance_sigmoid, same_size_label = self.RCNN_instanceDA( pooled_feat, need_backprop ) instance_loss = nn.BCELoss() DA_ins_loss_cls = instance_loss(instance_sigmoid, same_size_label) # consistency_prob = torch.max(F.softmax(base_score, dim=1),dim=1)[0] consistency_prob = F.softmax(base_score, dim=1)[:, 1, :, :] consistency_prob = torch.mean(consistency_prob) consistency_prob = consistency_prob.repeat(instance_sigmoid.size()) DA_cst_loss = self.consistency_loss(instance_sigmoid, consistency_prob.detach()) """ ************** taget loss **************** """ tgt_base_score, tgt_base_label = self.RCNN_imageDA( tgt_base_feat, tgt_need_backprop ) # Image DA tgt_base_prob = F.log_softmax(tgt_base_score, dim=1) tgt_DA_img_loss_cls = F.nll_loss(tgt_base_prob, tgt_base_label) tgt_instance_sigmoid, tgt_same_size_label = self.RCNN_instanceDA( tgt_pooled_feat, tgt_need_backprop ) tgt_instance_loss = nn.BCELoss() tgt_DA_ins_loss_cls = tgt_instance_loss( tgt_instance_sigmoid, tgt_same_size_label ) tgt_consistency_prob = F.softmax(tgt_base_score, dim=1)[:, 0, :, :] tgt_consistency_prob = torch.mean(tgt_consistency_prob) tgt_consistency_prob = tgt_consistency_prob.repeat(tgt_instance_sigmoid.size()) tgt_DA_cst_loss = self.consistency_loss( tgt_instance_sigmoid, tgt_consistency_prob.detach() ) return ( rois, cls_prob, bbox_pred, img_cls_loss, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, DA_img_loss_cls, DA_ins_loss_cls, tgt_DA_img_loss_cls, tgt_DA_ins_loss_cls, DA_cst_loss, tgt_DA_cst_loss, )
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map # base_feat = self.RCNN_base(im_data) base_feat_conv3 = self.RCNN_base(im_data) base_feat_conv4 = self.RCNN_conv4(base_feat_conv3) base_feat_conv5 = self.RCNN_conv5(base_feat_conv4) ############ x_o3 = self.conv3(base_feat_conv3) x_o4 = self.conv4(base_feat_conv4) x_o5 = self.conv5(base_feat_conv5) x_o = x_o3 + x_o4 + x_o5 x_e1 = self.e1(x_o) x_e2 = self.e2(x_e1) x = self.e3(x_e2) x = self.d1_deconv(x, output_size=x_e2.size()) x = self.d1(x) x = self.d2_deconv(torch.cat([x_e2, x], 1), output_size=base_feat_conv5.size()) x = self.d2(x) x = self.d3(torch.cat([x_e1, x], 1)) base_feat = self.d4(x) ############# # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes, need_backprop=None, dc_label=None): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) if self.training: # target:0 / source:1 / distorted:2 DA_loss_img = 0.1 * self.Dis(GradReverse.apply(base_feat), dc_label) else: DA_loss_img = 0 # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes, need_backprop) # if it is training phrase, then use ground trubut bboxes for refining if self.training: if need_backprop.numpy(): roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data # print(rois_label) rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: if need_backprop.numpy(): # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # gathers rois of the correspond class via rois_label # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: if need_backprop.numpy(): # calculate classification and b.b. regression loss only for source data # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) else: RCNN_loss_cls = Variable(torch.zeros(1).float().cuda()) RCNN_loss_bbox = Variable(torch.zeros(1).float().cuda()) rpn_loss_cls = Variable(torch.zeros(1).float().cuda()) rpn_loss_bbox = Variable(torch.zeros(1).float().cuda()) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, DA_loss_img
def forward(self, im_data, im_info, gt_boxes, num_boxes, target=False, eta=1.0): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data lossQ = -1 # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois''' #print("target is ",target) rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes, target) #print("rois.shape:",rois.shape) # if it is training phrase, then use ground trubut bboxes for refining if self.training and not target: #print("source traning---------------------------") #print("batch_size:",batch_size) #print("gt_boxes.shape:",gt_boxes.shape) #print("num_boxes:",num_boxes.data) ''' print(self.training) print(~target) print("use ground trubut bboxes for refining")''' roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 lossQ = -1 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model #print("pooled_feat before _head_to_tail:",pooled_feat.shape) if self.context: d_instance, _ = self.netD_pixel( grad_reverse(pooled_feat, lambd=eta)) #if target: #d_instance, _ = self.netD_pixel(grad_reverse(pooled_feat, lambd=eta)) #return d_pixel#, diff d_score_total, feat = self.netD_pixel(pooled_feat.detach()) else: d_score_total = self.netD_pixel(pooled_feat.detach()) d_instance = self.netD_pixel(grad_reverse(pooled_feat, lambd=eta)) #if target: #return d_pixel#,diff #d_score_total, _ = self.netD_pixel(pooled_feat.detach()) #print("d_score_total.shape",d_score_total.shape) #print("pooled_feat.shape:",pooled_feat.shape) d_instance_q = d_instance.split(128, 0) d_score_total_q = d_score_total.split(128, 0) d_score_total_qs = [] for img in range(batch_size): temp = torch.mean(d_score_total_q[img], dim=3) d_score_total_qs.append(torch.mean(temp, dim=2)) #d_score_total = torch.mean(d_score_total,dim=3) #d_score_total = torch.mean(d_score_total,dim=2) pooled_feat = self._head_to_tail(pooled_feat) #print("pooled_feat.shape:",pooled_feat.shape) if self.training and self.S_agent: pooled_feat_s = pooled_feat.split(128, 0) for img in range(batch_size): pooled_feat_d = pooled_feat_s[img] #print("------------------begain selecting in the source-----------------------") select_iter = int(pooled_feat_d.shape[0] / self.candidate_num) total_index = list(range(0, pooled_feat_d.shape[0])) np.random.shuffle(total_index) select_index = [] for eposide in range(select_iter): #print("#################################begain batch-%d-th the %d-th eposide##################################" % (img,eposide)) select_list = list(range(0, self.candidate_num)) batch_idx = total_index[eposide * self.candidate_num:(eposide + 1) * self.candidate_num] state = pooled_feat_d[batch_idx] #print("state.shape:",state.shape) d_score = d_score_total_qs[img][batch_idx] #print("d_score.shape:",d_score.shape) for it in range(self.select_num): #print("#########begain the %d-th selection################" % (it)) epsilon = self.epsilon_by_epoch(self.iter_dqn) action_index = self.current_model.act( state, epsilon, select_list) #print("action_index:",action_index) #action_episode.append(action_index) try: select_list.remove(action_index) except: print("select_list:", select_list) print("action_index:", action_index) print( "error!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") continue #print("the %d-th select, action_index is %d"%(it,action_index)) if d_score[action_index] > self.ts: reward = -1 else: reward = 1 #print("reward:",reward) next_state = torch.tensor(state) next_state[action_index] = torch.zeros( 1, next_state.shape[1]) if it == (self.select_num - 1): done = 1 else: done = 0 self.replay_buffer.push(state, action_index, reward, next_state, done, select_list) self.iter_dqn = self.iter_dqn + 1 state = next_state select_index = select_index + [ batch_idx[i] for i in select_list ] if len(self.replay_buffer) > cfg.BATCH_SIZE_DQN: lossQ = DQN.compute_td_loss(self.current_model, self.target_model, self.replay_buffer, cfg.BATCH_SIZE_DQN) if np.mod(self.iter_dqn, cfg.replace_target_iter) == 0: DQN.update_target(self.current_model, self.target_model) if img == 0: d_instance_refine = d_instance_q[img][select_index] else: d_instance_refine = torch.cat( (d_instance_refine, d_instance_q[img][select_index]), 0) pooled_feat_original = torch.tensor(pooled_feat) if self.context: feat = feat.view(feat.size(0), -1) pooled_feat = torch.cat((feat, pooled_feat), 1) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic and not target: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability #print("pooled_feat.shape in faster_rcnn_global_pixel_instance:",pooled_feat.shape) cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) #print("cls_prob is ",cls_prob.shape) if self.training and target and self.T_agent: pooled_feat_t = pooled_feat_original.split(128, 0) for img in range(batch_size): pooled_feat_d = pooled_feat_t[img] select_iter_T = int(pooled_feat_d.shape[0] / self.candidate_num) #print("select_iter_T:",select_iter_T) total_index_T = list(range(0, pooled_feat_d.shape[0])) np.random.shuffle(total_index_T) #print("gt_label:",gt_label) #print("total_index:",len(total_index)) select_index_T = [] for eposide_T in range(select_iter_T): select_list_T = list(range(0, self.candidate_num)) batch_idx_T = total_index_T[eposide_T * self.candidate_num:(eposide_T + 1) * self.candidate_num] state_T = pooled_feat_d[batch_idx_T] d_score_T = d_score_total_qs[img][batch_idx_T] #print("label_pre:",label_pre) for it in range(self.select_num): epsilon_T = self.epsilon_by_epoch_T(self.iter_dqn_T) action_index_T = self.current_model_T.act( state_T, epsilon_T, select_list_T) #select_list_T.remove(action_index_T) try: select_list_T.remove(action_index_T) except: print("select_list_T:", select_list_T) print("action_index:", action_index_T) print( "error!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") continue #print("label_pre[action_index_T]:",label_pre[action_index_T]) #print("torch.eq(gt_label,label_pre[action_index_T]):",torch.eq(gt_label,label_pre[action_index_T])) if d_score_T[action_index_T] > self.tt: reward = 1 else: reward = -1 #print("D_score:",d_score_T[action_index_T][1],"reward:",reward) next_state_T = torch.tensor(state_T) next_state_T[action_index_T] = torch.zeros( 1, next_state_T.shape[1]) if it == (self.select_num - 1): done = 1 else: done = 0 self.replay_buffer_T.push(state_T, action_index_T, reward, next_state_T, done, select_list_T) self.iter_dqn_T = self.iter_dqn_T + 1 state_T = next_state_T #print("select_list_T:",select_list_T) #if len(self.replay_buffer_T)>cfg.BATCH_SIZE_DQN: # lossQ = DQN.compute_td_loss(self.current_model_T,self.target_model_T,self.replay_buffer_T,cfg.BATCH_SIZE_DQN) #if np.mod(self.iter_dqn_T,cfg.replace_target_iter)==0: # DQN.update_target(self.current_model_T,self.target_model_T) select_index_T = select_index_T + [ batch_idx_T[i] for i in select_list_T ] if len(self.replay_buffer_T) > cfg.BATCH_SIZE_DQN: lossQ = DQN.compute_td_loss(self.current_model_T, self.target_model_T, self.replay_buffer_T, cfg.BATCH_SIZE_DQN) if np.mod(self.iter_dqn_T, cfg.replace_target_iter) == 0: DQN.update_target(self.current_model_T, self.target_model_T) #d_instance = d_instance[select_index_T] if img == 0: d_instance_refine = d_instance_q[img][select_index_T] else: d_instance_refine = torch.cat( (d_instance_refine, d_instance_q[img][select_index_T]), 0) if target: return d_instance_refine, lossQ RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) if self.S_agent: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_instance_refine, lossQ #,diff else: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_instance, lossQ
def forward(self, im_data, im_info, gt_boxes, num_boxes, flag=True, rpn_bs=128): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # conv1 feature map conv1_feat = self.Conv1(im_data) # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(conv1_feat) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes, rpn_bs) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) ################################################ # define generator network ################################################ #import ipdb #ipdb.set_trace() if flag == False: # import ipdb # ipdb.set_trace() downsample_conv1_feat = self.downsample(conv1_feat) conv1_pooled_feat = self.RCNN_roi_pool_conv1( downsample_conv1_feat, rois.view(-1, 5)) residual_feat = self.residualblock(conv1_pooled_feat) pooled_feat = residual_feat + pooled_feat # feed pooled features to top model rcnn_pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(rcnn_pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(rcnn_pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) if self.training: rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) rois_label = torch.unsqueeze(rois_label, 0) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes, target=False, test=False, eta=1.0, hints=False): if test: self.training = False batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat1 = self.RCNN_base1(im_data) if self.dc == 'swda': if self.lc: d_pixel, _ = self.netD_pixel( grad_reverse(base_feat1, lambd=eta)) # print(d_pixel) if not target: _, feat_pixel = self.netD_pixel(base_feat1.detach()) else: d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta)) base_feat = self.RCNN_base2(base_feat1) if self.dc == 'vanilla': domain = self.netD_dc(grad_reverse(base_feat, lambd=eta)) if target: return None, domain elif self.dc == 'swda': if self.gc: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return d_pixel, domain_p _, feat = self.netD(base_feat.detach()) else: domain_p = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return d_pixel, domain_p # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) #feat_pixel = torch.zeros(feat_pixel.size()).cuda() if self.lc: feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat_pixel, pooled_feat), 1) if self.gc: feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat, pooled_feat), 1) # compute bbox offset # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) if self.conf: # confidence confidence = F.sigmoid(self.netD_confidence(pooled_feat)) # Make sure we don't have any numerical instability eps = 1e-12 pred_original = torch.clamp(cls_prob, 0. + eps, 1. - eps) confidence = torch.clamp(confidence, 0. + eps, 1. - eps) confidence_loss = (-torch.log(confidence)) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss if self.conf and hints: # Randomly set half of the confidences to 1 (i.e. no hints) b = torch.bernoulli( torch.Tensor(confidence.size()).uniform_(0, 1)).cuda() conf = confidence * b + (1 - b) labels_onehot = encode_onehot(rois_label, pred_original.size(1)) pred_new = pred_original * conf.expand_as(pred_original) + \ labels_onehot * (1 - conf.expand_as(labels_onehot)) pred_new = torch.log(pred_new) RCNN_loss_cls = F.nll_loss(pred_new, rois_label) else: RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) if test: self.training = True if self.dc == 'swda' and self.conf is None: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p, None, None elif self.dc == 'vanilla' and self.conf is None: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, domain, None, None elif self.conf and self.dc is None: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, None, confidence_loss, confidence elif self.conf and self.dc == "swda": return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p, confidence_loss, confidence elif self.conf and self.dc == "vanilla": return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, domain, confidence_loss, confidence else: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, None, None, None
def forward( self, src_im_data, src_im_info, src_im_cls_lb, src_gt_boxes, src_num_boxes, src_need_backprop, _tgt_im_data, _tgt_im_info, _tgt_gt_boxes, _tgt_num_boxes, _tgt_need_backprop, ft_im_data, ft_im_info, ft_im_cls_lb, ft_gt_boxes, ft_num_boxes, ft_need_backprop, fs_im_data, fs_im_info, fs_gt_boxes, fs_num_boxes, fs_need_backprop, weight_value=1.0, ): #concate src_im_data and ft_im_data im_data = torch.cat([src_im_data, ft_im_data], dim=0) im_info = torch.cat([src_im_info, ft_im_info], dim=0) im_cls_lb = torch.cat([src_im_cls_lb, ft_im_cls_lb], dim=0) gt_boxes = torch.cat([src_gt_boxes, ft_gt_boxes], dim=0) num_boxes = torch.cat([src_num_boxes, ft_num_boxes], dim=0) need_backprop = torch.cat([src_need_backprop, ft_need_backprop], dim=0) batch_size = im_data.size(0) im_info = im_info.data im_cls_lb = im_cls_lb.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data need_backprop = need_backprop.data base_feat = self.RCNN_base(im_data) cls_feat = self.conv_lst(self.avg_pool(base_feat)).squeeze(-1).squeeze(-1) img_cls_loss = nn.BCEWithLogitsLoss()(cls_feat, im_cls_lb) #for image-level contra loss contra_src_loss = self.compute_contra_loss(base_feat) #concate _tgt_data and fs_im_data tgt_im_data = torch.cat([_tgt_im_data, fs_im_data], dim=0) tgt_im_info = torch.cat([_tgt_im_info, fs_im_info], dim=0) tgt_gt_boxes = torch.cat([_tgt_gt_boxes, fs_gt_boxes], dim=0) tgt_num_boxes = torch.cat([_tgt_num_boxes, fs_num_boxes], dim=0) tgt_need_backprop = torch.cat([_tgt_need_backprop, fs_need_backprop], dim=0) tgt_batch_size = tgt_im_data.size(0) tgt_im_info = tgt_im_info.data tgt_gt_boxes = tgt_gt_boxes.data tgt_num_boxes = tgt_num_boxes.data tgt_need_backprop = tgt_need_backprop.data # feed base feature map tp RPN to obtain rois self.RCNN_rpn.train() rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes ) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2)) ) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == "align": pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == "pool": pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view( bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4 ) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand( rois_label.size(0), 1, 4 ), ) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 # ins_contra_loss = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss( bbox_pred, rois_target, rois_inside_ws, rois_outside_ws ) #for probability invariance invar_num = 60 invar_index = np.random.choice(rois.size(1), size=invar_num) invar_rois = torch.zeros((rois.size(0) * invar_num, rois.size(2))).cuda() for i in range(batch_size): for j in range(invar_num): invar_rois[i*invar_num + j] = rois[i][invar_index[j]] invar_rois = torch.cat([invar_rois.unsqueeze(0), invar_rois.unsqueeze(0)], dim=0) if cfg.POOLING_MODE == "align": invar_pooled_feat = self.RCNN_roi_align(base_feat, invar_rois.view(-1, 5)) elif cfg.POOLING_MODE == "pool": invar_pooled_feat = self.RCNN_roi_pool(base_feat, invar_rois.view(-1, 5)) # feed pooled features to top model invar_pooled_feat = self._head_to_tail(invar_pooled_feat) # compute object classification probability invar_cls_score = self.RCNN_cls_score(invar_pooled_feat) invar_cls_prob = F.softmax(invar_cls_score, 1) invar_cls_prob = invar_cls_prob.view(batch_size, -1, invar_cls_prob.size(1)) s_invar_cls_prob = invar_cls_prob[:1].squeeze(0) ft_invar_cls_prob = invar_cls_prob[1:].squeeze(0) invar_kdl_loss = self.KLDistance(s_invar_cls_prob, ft_invar_cls_prob) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) """ =================== for target ==========================""" tgt_batch_size = tgt_im_data.size(0) tgt_im_info = ( tgt_im_info.data ) # (size1,size2, image ratio(new image / source image) ) tgt_gt_boxes = tgt_gt_boxes.data tgt_num_boxes = tgt_num_boxes.data tgt_need_backprop = tgt_need_backprop.data # feed image data to base model to obtain base feature map tgt_base_feat = self.RCNN_base(tgt_im_data) contra_tgt_loss = self.compute_contra_loss(tgt_base_feat) tgt_img_cls_feat = self.conv_lst(tgt_base_feat) tgt_img_cls_feat = F.softmax(tgt_img_cls_feat, dim=1) tgt_img_cls_loss = self.entropy_loss(tgt_img_cls_feat) # add new code tgt_image_cls_feat = ( self.conv_lst(self.avg_pool(tgt_base_feat)).squeeze(-1).squeeze(-1).detach() ) # tgt_image_cls_feat = F.sigmoid(tgt_image_cls_feat[0]).detach() tgt_image_cls_feat = F.sigmoid(tgt_image_cls_feat).detach() # feed base feature map tp RPN to obtain rois self.RCNN_rpn.eval() tgt_rois, tgt_rpn_loss_cls, tgt_rpn_loss_bbox = self.RCNN_rpn( tgt_base_feat, tgt_im_info, tgt_gt_boxes, tgt_num_boxes ) # if it is training phrase, then use ground trubut bboxes for refining tgt_rois_label = None tgt_rois_target = None tgt_rois_inside_ws = None tgt_rois_outside_ws = None tgt_rpn_loss_cls = 0 tgt_rpn_loss_bbox = 0 tgt_rois = Variable(tgt_rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == "crop": # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) tgt_grid_xy = _affine_grid_gen( tgt_rois.view(-1, 5), tgt_base_feat.size()[2:], self.grid_size ) tgt_grid_yx = torch.stack( [tgt_grid_xy.data[:, :, :, 1], tgt_grid_xy.data[:, :, :, 0]], 3 ).contiguous() tgt_pooled_feat = self.RCNN_roi_crop( tgt_base_feat, Variable(tgt_grid_yx).detach() ) if cfg.CROP_RESIZE_WITH_MAX_POOL: tgt_pooled_feat = F.max_pool2d(tgt_pooled_feat, 2, 2) elif cfg.POOLING_MODE == "align": tgt_pooled_feat = self.RCNN_roi_align(tgt_base_feat, tgt_rois.view(-1, 5)) elif cfg.POOLING_MODE == "pool": tgt_pooled_feat = self.RCNN_roi_pool(tgt_base_feat, tgt_rois.view(-1, 5)) # feed pooled features to top model tgt_pooled_feat = self._head_to_tail(tgt_pooled_feat) # add new code tgt_cls_score = self.RCNN_cls_score(tgt_pooled_feat).detach() tgt_prob = F.softmax(tgt_cls_score, 1).detach() tgt_pre_label = tgt_prob.argmax(1).detach() #for probability invariance for target domain, zf tgt_invar_num = 60 tgt_invar_index = np.random.choice(tgt_rois.size(1), size=tgt_invar_num) tgt_invar_rois = torch.zeros((tgt_rois.size(0) * tgt_invar_num, tgt_rois.size(2))).cuda() for i in range(tgt_batch_size): for j in range(tgt_invar_num): tgt_invar_rois[i*tgt_invar_num + j] = tgt_rois[i][tgt_invar_index[j]] tgt_invar_rois = torch.cat([tgt_invar_rois.unsqueeze(0), tgt_invar_rois.unsqueeze(0)], dim=0) # do roi pooling based on predicted rois if cfg.POOLING_MODE == "align": tgt_invar_pooled_feat = self.RCNN_roi_align(tgt_base_feat, tgt_invar_rois.view(-1, 5)) elif cfg.POOLING_MODE == "pool": tgt_invar_pooled_feat = self.RCNN_roi_pool(tgt_base_feat, tgt_invar_rois.view(-1, 5)) # feed pooled features to top model tgt_invar_pooled_feat = self._head_to_tail(tgt_invar_pooled_feat) # compute object classification probability tgt_invar_cls_score = self.RCNN_cls_score(tgt_invar_pooled_feat) tgt_invar_cls_prob = F.softmax(tgt_invar_cls_score, 1) tgt_invar_cls_prob = tgt_invar_cls_prob.view(tgt_batch_size, -1, tgt_invar_cls_prob.size(1)) t_invar_cls_prob = tgt_invar_cls_prob[:1].squeeze(0) fs_invar_cls_prob = tgt_invar_cls_prob[1:].squeeze(0) tgt_invar_kdl_loss = self.KLDistance(t_invar_cls_prob, fs_invar_cls_prob) """ DA loss """ # DA LOSS DA_img_loss_cls = 0 DA_ins_loss_cls = 0 tgt_DA_img_loss_cls = 0 tgt_DA_ins_loss_cls = 0 base_score, local_base_score, base_label = self.RCNN_imageDA(base_feat, need_backprop) # Image DA base_prob = F.log_softmax(base_score, dim=1) DA_img_loss_cls = F.nll_loss(base_prob, base_label) #Image DA for local local_DA_img_loss_cls = 0 for i in range(self.n_classes-1): local_base_prob_i = F.log_softmax(local_base_score[i], dim=1) local_DA_img_loss_cls_i = F.nll_loss(local_base_prob_i, base_label) local_DA_img_loss_cls += local_DA_img_loss_cls_i local_DA_img_loss_cls = local_DA_img_loss_cls / (self.n_classes-1) instance_sigmoid, same_size_label = self.RCNN_instanceDA( pooled_feat, need_backprop ) instance_loss = nn.BCELoss() DA_ins_loss_cls = instance_loss(instance_sigmoid, same_size_label) # # consistency_prob = torch.max(F.softmax(base_score, dim=1),dim=1)[0] # consistency_prob = F.softmax(base_score, dim=1)[:, 1, :, :] # consistency_prob = torch.mean(consistency_prob) # consistency_prob = consistency_prob.repeat(instance_sigmoid.size()) # DA_cst_loss = self.consistency_loss(instance_sigmoid, consistency_prob.detach()) #new consistency prob, zf DA_cst_loss = 0 consistency_prob = F.softmax(base_score, dim=1)[:, 1, :, :] da_instance_sigmoid = instance_sigmoid.view(batch_size, -1,1) for i in range(batch_size): consistency_prob_i = torch.mean(consistency_prob[i]) da_instance_sigmoid_i = da_instance_sigmoid[i] consistency_prob_i = consistency_prob_i.repeat(da_instance_sigmoid_i.size()) DA_cst_loss_i = self.consistency_loss(da_instance_sigmoid_i, consistency_prob_i.detach()) DA_cst_loss += DA_cst_loss_i DA_cst_loss = DA_cst_loss / batch_size """ ************** taget loss **************** """ tgt_base_score, tgt_local_base_score, tgt_base_label = self.RCNN_imageDA( tgt_base_feat, tgt_need_backprop ) # Image DA tgt_base_prob = F.log_softmax(tgt_base_score, dim=1) tgt_DA_img_loss_cls = F.nll_loss(tgt_base_prob, tgt_base_label) tgt_instance_sigmoid, tgt_same_size_label = self.RCNN_instanceDA( tgt_pooled_feat, tgt_need_backprop ) #Image DA for local tgt_local_DA_img_loss_cls = 0 for i in range(self.n_classes-1): tgt_local_base_prob_i = F.log_softmax(tgt_local_base_score[i], dim=1) tgt_local_DA_img_loss_cls_i = F.nll_loss(tgt_local_base_prob_i, tgt_base_label) tgt_local_DA_img_loss_cls += tgt_local_DA_img_loss_cls_i tgt_local_DA_img_loss_cls = tgt_local_DA_img_loss_cls/(self.n_classes-1) # add new code target_weight = [] tgt_rois_num_each = int(len(tgt_pre_label)/tgt_batch_size) tgt_image_cls_feat_index = -1 for i in range(len(tgt_pre_label)): #zf if i % tgt_rois_num_each == 0: tgt_image_cls_feat_index +=1 label_i = tgt_pre_label[i].item() if label_i > 0: diff_value = torch.exp( weight_value * torch.abs(tgt_image_cls_feat[tgt_image_cls_feat_index][label_i - 1] - tgt_prob[i][label_i]) ).item() target_weight.append(diff_value) else: target_weight.append(1.0) tgt_instance_loss = nn.BCELoss( weight=torch.Tensor(target_weight).view(-1, 1).cuda() ) tgt_DA_ins_loss_cls = tgt_instance_loss( tgt_instance_sigmoid, tgt_same_size_label ) # tgt_consistency_prob = F.softmax(tgt_base_score, dim=1)[:, 0, :, :] # tgt_consistency_prob = torch.mean(tgt_consistency_prob) # tgt_consistency_prob = tgt_consistency_prob.repeat(tgt_instance_sigmoid.size()) # tgt_DA_cst_loss = self.consistency_loss( # tgt_instance_sigmoid, tgt_consistency_prob.detach() # ) #consistency_prob for batch, zf tgt_DA_cst_loss = 0 tgt_consistency_prob = F.softmax(tgt_base_score, dim=1)[:, 0, :, :] tgt_da_instance_sigmoid = tgt_instance_sigmoid.view(tgt_batch_size, -1,1) for i in range(tgt_batch_size): tgt_consistency_prob_i = torch.mean(tgt_consistency_prob[i]) tgt_da_instance_sigmoid_i = tgt_da_instance_sigmoid[i] tgt_consistency_prob_i = tgt_consistency_prob_i.repeat(tgt_da_instance_sigmoid_i.size()) tgt_DA_cst_loss_i = self.consistency_loss(tgt_da_instance_sigmoid_i, tgt_consistency_prob_i.detach()) tgt_DA_cst_loss += tgt_DA_cst_loss_i tgt_DA_cst_loss = tgt_DA_cst_loss / tgt_batch_size return ( rois, cls_prob, bbox_pred, img_cls_loss, tgt_img_cls_loss, contra_src_loss, contra_tgt_loss, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, # tgt_RCNN_loss_cls, RCNN_loss_bbox, rois_label, invar_kdl_loss, tgt_invar_kdl_loss, DA_img_loss_cls, local_DA_img_loss_cls, DA_ins_loss_cls, tgt_DA_img_loss_cls, tgt_local_DA_img_loss_cls, tgt_DA_ins_loss_cls, DA_cst_loss, tgt_DA_cst_loss, )
def forward(self, im_data, im_info, gt_boxes, num_boxes): #print('gt_number1: ' + str(int(torch.sum(gt_boxes[0,:,4]==1)))) #print('gt_number2: ' + str(int(torch.sum(gt_boxes[1,:,4]==1)))) batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) #import pdb #pdb.set_trace() # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws, gt_rois = roi_data #import pdb #pdb.set_trace() rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) #import pdb #pdb.set_trace() if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss #RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # import pdb #pdb.set_trace() loss_cls = torch.Tensor(rois_label.size(0)).cuda() for i in range(rois_label.shape[0]): loss_cls[i] = F.cross_entropy(cls_score[i].view(1, 2), rois_label[i].view(1)) RCNN_loss_cls = torch.mean( torch.cat((loss_cls[rois_label == 0].sort( 0, True)[0][:int(loss_cls[rois_label == 0].size(0) * 3 / 10)], loss_cls[rois_label == 1]), 0)) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) #import pdb #pdb.set_trace() RCNN_loss_repgt, RCNN_loss_repbox = repulsion( rois, bbox_pred, gt_rois, rois_inside_ws, rois_outside_ws) #print(RCNN_loss_repgt) #RCNN_loss_repgt=torch.zeros(1).cuda() #RCNN_loss_repbox=torch.zeros(1).cuda() rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) RCNN_loss_repgt = torch.unsqueeze(RCNN_loss_repgt, 0) RCNN_loss_repbox = torch.unsqueeze(RCNN_loss_repbox, 0) else: RCNN_loss_repgt = torch.zeros(1).cuda() RCNN_loss_repbox = torch.zeros(1).cuda() cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) #import pdb #pdb.set_trace() return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, RCNN_loss_repgt, RCNN_loss_repbox, rois_label
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data self.batch_size = im_data.size(0) # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # Base feature base_feat = self.RCNN_conv_new(base_feat) # Local feature with PS-ROIPooling # Local classification local_cls_feat = self.RCNN_local_cls_base(base_feat) local_cls_feat = self.RCNN_psroi_pool_cls(local_cls_feat, rois.view(-1, 5)) local_cls = self.avg_pooling(local_cls_feat) local_cls = self.RCNN_local_cls_fc(local_cls) # Local bbox regression local_bbox_feat = self.RCNN_local_bbox_base(base_feat) local_bbox_feat = self.RCNN_psroi_pool_loc(local_bbox_feat, rois.view(-1, 5)) local_bbox = self.avg_pooling(local_bbox_feat) # Global feature with ROIPooling if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5)) global_base = self.RCNN_global_base(pooled_feat) global_cls = self.RCNN_global_cls(global_base) global_bbox = self.RCNN_global_bbox(global_base) # fusion global feature and local feature cls_score = (local_cls + global_cls).squeeze() bbox_pred = (local_bbox + global_bbox).squeeze() if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) cls_prob = F.softmax(cls_score, dim=1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: loss_func = self.ohem_detect_loss if cfg.TRAIN.OHEM else self.detect_loss RCNN_loss_cls, RCNN_loss_bbox = loss_func(cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label