def forward(self, base_feat, im_info, gt_boxes, num_boxes): batch_size = base_feat.size(0) # return feature map after convrelu layer # rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True) # get rpn classification score rpn_cls_score = self.RPN_cls_score(base_feat) rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1) rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) # get rpn offsets to the anchor boxes rpn_bbox_pred = self.RPN_bbox_pred(base_feat) # proposal layer cfg_key = 'TRAIN' if self.training else 'TEST' rois = self.RPN_proposal( (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key)) self.rpn_loss_cls = 0 self.rpn_loss_box = 0 # generating training labels and build the rpn loss if self.training: assert gt_boxes is not None rpn_data = self.RPN_anchor_target( (rpn_cls_score.data, gt_boxes, im_info, num_boxes)) # compute classification loss rpn_cls_score = rpn_cls_score_reshape.permute( 0, 2, 3, 1).contiguous().view(batch_size, -1, 2) rpn_label = rpn_data[0].view(batch_size, -1) rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1)) rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0, rpn_keep) rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data) rpn_label = Variable(rpn_label.long()) # from collections import Counter # label = rpn_label.cpu().numpy() # print(Counter(label)) loss = -F.log_softmax(rpn_cls_score, dim=1)[:, 0] mask, num_pos = hard_negative_mining(loss, rpn_label) confidence = rpn_cls_score[mask, :] self.rpn_loss_cls = F.cross_entropy(confidence.reshape(-1, 2), rpn_label[mask], reduction='mean') # self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label) # self.rpn_loss_cls = OHEM_loss(rpn_cls_score, rpn_label) fg_cnt = torch.sum(rpn_label.data.ne(0)) rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[ 1:] # compute bbox regression loss rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights) rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights) rpn_bbox_targets = Variable(rpn_bbox_targets) self.rpn_loss_box = _smooth_l1_loss( rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=3, dim=[1, 2, 3], ) return rois, self.rpn_loss_cls, self.rpn_loss_box
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map start = time.time() basefeat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois rpn_feat= self.rpn(basefeat) rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(rpn_feat, im_info, gt_boxes, num_boxes) rpn_time = time.time() self.rpn_time = rpn_time - start # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) pre_roi_time = time.time() self.pre_roi_time = pre_roi_time - rpn_time base_feat = self.sam([basefeat,rpn_feat]) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'align': pooled_feat = self._roi_align_layer(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self._roi_pool_layer(base_feat, rois.view(-1, 5)) roi_pool_time = time.time() self.roi_pooling_time = roi_pool_time - pre_roi_time # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss # RCNN_loss_cls = OHEM_loss(cls_score,rois_label) loss = -F.log_softmax(cls_score, dim=1)[:, 0] mask, num_pos = hard_negative_mining(loss, rois_label) confidence = cls_score[mask, :] RCNN_loss_cls = F.cross_entropy(confidence, rois_label[mask], reduction='mean') # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) RCNN_loss_bbox = RCNN_loss_bbox * 2 # "to balance multi-task training" cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) subnet_time = time.time() self.subnet_time = subnet_time - roi_pool_time time_measure = [ self.rpn_time, self.pre_roi_time, self.roi_pooling_time, self.subnet_time ] return time_measure, rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label