def forward(self, im_data, im_info, gt_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data #num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat1 = self.RCNN_base1(im_data) # 1/8 base_feat2 = self.RCNN_base2(base_feat1) # 1/16 base_feat3 = self.RCNN_base3(base_feat2) # 1/32 downSample = self.downSample(base_feat1) upSample = F.interpolate(base_feat3, scale_factor=2, mode='nearest') base_feat = torch.cat((downSample, base_feat2, upSample), 1) base_feat = self.downBeat(base_feat) #print(base_feat.shape) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes) # if it is training phrase, then use ground trubut bboxes for refining rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) #print(self.training) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) #print(rois.shape) #for index in range(0, 300): # if cls_prob[:,index,0] < 0.5: # print(cls_prob[:,index,:], rois[:,index,:]) #print(bbox_pred) if self.training: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label else: return rois, cls_prob, bbox_pred
def forward(self, im_data, im_info, gt_boxes): cfg = self.cfg batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data #num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # print(base_feat.shape) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) # if self.training and not self.class_agnostic: # # select the corresponding columns according to roi labels # bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) # bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) # bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) if self.training: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label else: return rois, cls_prob, bbox_pred
def forward(self, im_data, im_info, gt_boxes): cfg = self.cfg # print(im_data.shape, im_info) batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data # num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # print(base_feat.shape) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # print(self.training) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss if self.cfg.TRAIN.is_ohem_rcnn: RCNN_loss_cls = F.cross_entropy(cls_score, rois_label, reduction='none') top_k = int(0.125 * self.cfg.TRAIN.BATCH_SIZE * base_feat.size(0)) _, topk_loss_inds = RCNN_loss_cls.topk(top_k) RCNN_loss_cls = RCNN_loss_cls[topk_loss_inds].mean() else: RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) if cfg.TRAIN.loss_type == "smoothL1loss": if self.cfg.TRAIN.is_ohem_rcnn: # RCNN_loss_bbox = _smooth_l1_loss(bbox_pred[topk_loss_inds, :], rois_target[topk_loss_inds, :], # rois_inside_ws[topk_loss_inds, :], rois_outside_ws[topk_loss_inds, :], sigma=3.0) RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) else: # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) # RCNN_loss_bbox = _balance_smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) elif "IOUloss" in cfg.TRAIN.loss_type: iou, g_iou = compute_iou(rois_target, rois_target, rois_inside_ws, rois_outside_ws) if cfg.TRAIN.loss_type == "GIOUloss": RCNN_loss_bbox = 1 - g_iou elif cfg.TRAIN.loss_type == "IOUloss": RCNN_loss_bbox = -iou.log() cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) if self.training: # return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox else: return rois, cls_prob, bbox_pred
def forward(self, base_feat, im_info, gt_boxes): cfg = self.cfg batch_size = base_feat.size(0) # return feature map after convrelu layer rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True) # get rpn classification score RPN_cls_score = self.RPN_cls_score(rpn_conv1) rpn_cls_score_reshape = self.reshape(RPN_cls_score, 2) rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1) rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) # get rpn offsets to the anchor boxes RPN_bbox_pred = self.RPN_bbox_pred(rpn_conv1) # proposal layer cfg_key = 'TRAIN' if self.training else 'TEST' rois, all_proposals = self.RPN_proposal((rpn_cls_prob.data, RPN_bbox_pred.data, im_info, cfg_key)) self.rpn_loss_cls = 0 self.rpn_loss_box = 0 # generating training labels and build the rpn loss if self.training: assert gt_boxes is not None rpn_data = self.RPN_anchor_target( (RPN_cls_score.data, gt_boxes, im_info)) # compute classification loss RPN_cls_score = rpn_cls_score_reshape.permute( 0, 2, 3, 1).contiguous().view(batch_size, -1, 2) rpn_label = rpn_data[0].view(batch_size, -1) rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1)) RPN_cls_score = torch.index_select( RPN_cls_score.view(-1, 2), 0, rpn_keep) rpn_label = torch.index_select( rpn_label.view(-1), 0, rpn_keep.data) rpn_label = Variable(rpn_label.long()) fg_cnt = torch.sum(rpn_label.data.ne(0)) if self.cfg.TRAIN.is_ohem_rpn: # added by Henson rpn_loss_cls = F.cross_entropy( RPN_cls_score, rpn_label, reduction='none') top_k = int(0.125 * self.cfg.TRAIN.RPN_BATCHSIZE * base_feat.size(0)) _, topk_loss_inds = rpn_loss_cls.topk(top_k) self.rpn_loss_cls = rpn_loss_cls[topk_loss_inds].mean() else: self.rpn_loss_cls = F.cross_entropy(RPN_cls_score, rpn_label) rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[ 1:] # compute bbox regression loss rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights) rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights) rpn_bbox_targets = Variable(rpn_bbox_targets) if cfg.TRAIN.loss_type == "smoothL1loss": if self.cfg.TRAIN.is_ohem_rpn: # added by Henson rpn_loss_box = _smooth_l1_loss_by_zcc(RPN_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=3, dim=[1, 2, 3]) rpn_loss_box = rpn_loss_box.view(-1) top_k = int(0.125 * rpn_bbox_inside_weights.sum() + 0.5) # print("=> top_k: ", top_k) _, topk_loss_inds = rpn_loss_box.topk(top_k) self.rpn_loss_box = rpn_loss_box[topk_loss_inds].mean() else: self.rpn_loss_box = _smooth_l1_loss(RPN_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=3, dim=[1, 2, 3]) # self.rpn_loss_box = _balance_smooth_l1_loss(RPN_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, # rpn_bbox_outside_weights, sigma=3, dim=[1,2,3]) # iou, g_iou = compute_iou( # gt_boxes[:, :, 0:4].view(-1, 4), gt_boxes[:, :, 0:4].view(-1, 4)) # all_proposals.view(-1, 4) elif "IOUloss" in cfg.TRAIN.loss_type: iou, g_iou = compute_iou( rpn_bbox_targets, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights) if cfg.TRAIN.loss_type == "GIOUloss": self.rpn_loss_box = 1 - g_iou elif cfg.TRAIN.loss_type == "IOUloss": self.rpn_loss_box = -iou.log() return rois, self.rpn_loss_cls, self.rpn_loss_box
def forward(self, base_feat, im_info, gt_boxes): batch_size = base_feat.size(0) # return feature map after convrelu layer rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True) # get rpn classification score rpn_cls_score = self.RPN_cls_score(rpn_conv1) rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1) rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) # get rpn offsets to the anchor boxes rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1) # proposal layer cfg_key = 'TRAIN' if self.training else 'TEST' rois = self.RPN_proposal( (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key)) self.rpn_loss_cls = 0 self.rpn_loss_box = 0 # generating training labels and build the rpn loss if self.training: assert gt_boxes is not None rpn_data = self.RPN_anchor_target( (rpn_cls_score.data, gt_boxes, im_info)) # compute classification loss rpn_cls_score = rpn_cls_score_reshape.permute( 0, 2, 3, 1).contiguous().view(batch_size, -1, 2) rpn_label = rpn_data[0].view(batch_size, -1) rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1)) rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0, rpn_keep) rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data) rpn_label = Variable(rpn_label.long()) self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label) fg_cnt = torch.sum(rpn_label.data.ne(0)) rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[ 1:] # compute bbox regression loss rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights) rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights) rpn_bbox_targets = Variable(rpn_bbox_targets) self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=3, dim=[1, 2, 3]) return rois, self.rpn_loss_cls, self.rpn_loss_box
def forward(self, im_data, im_info, gt_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data #num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) #print(base_feat.shape) # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) #print(self.training) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) if self.training: #return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox else: return rois, cls_prob, bbox_pred