def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # input p2-p5 fpn_fms = fpn_fms[1:][::-1] pool_features = self.box_pooler(fpn_fms, rcnn_rois) flatten_feature = torch.flatten(pool_features, start_dim=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_cls = self.pred_cls(roi_feature) pred_delta = self.pred_delta(roi_feature) if self.training: # loss for regression labels = labels.flatten() fg_inds = torch.nonzero(labels > 0).flatten() loss_rcnn_reg = smooth_l1_loss(pred_delta[fg_inds], bbox_targets[fg_inds], config.rcnn_smooth_l1_beta, 'sum') loss_rcnn_reg = loss_rcnn_reg / pred_delta.shape[0] # loss for classification loss_rcnn_cls = F.cross_entropy(pred_cls, labels.long(), reduction="mean") loss_dict = {} loss_dict['loss_rcnn_reg'] = loss_rcnn_reg loss_dict['loss_rcnn_cls'] = loss_rcnn_cls return loss_dict else: pred_scores = F.softmax(pred_cls, dim=-1) pred_bbox = restore_bbox(rcnn_rois[:, 1:5], pred_delta, True) pred_bbox = torch.cat( [pred_bbox, pred_scores[:, 1].reshape(-1, 1)], dim=1) return pred_bbox
def forward(self, features, im_info, boxes=None): assert len(features) == config.num_anchor_scales # get the predict results pred_cls_score_list = [] pred_bbox_offsets_list = [] all_anchors_list = [] for x in features: t = F.relu(self.rpn_conv(x)) pred_cls_score_list.append(self.rpn_cls_score(t)) pred_bbox_offsets_list.append(self.rpn_bbox_offsets(t)) # get anchors max_fm_stride = 2**(len(features) + 1) fm_stride = max_fm_stride anchor_scales = config.anchor_scales[::-1] for i in range(config.num_anchor_scales): anchor_scale = [anchor_scales[i]] layer_anchors = generate_anchors_opr(features[i], fm_stride, anchor_scale, config.anchor_aspect_ratios, config.anchor_base_size) fm_stride = fm_stride // 2 all_anchors_list.append(layer_anchors) # sample from the predictions with torch.no_grad(): rpn_rois, rpn_rois_inds = find_top_rpn_proposals( self.training, pred_bbox_offsets_list, pred_cls_score_list, all_anchors_list, im_info) if self.training: with torch.no_grad(): rpn_labels, rpn_bbox_targets = fpn_anchor_target( boxes, im_info, all_anchors_list) pred_cls_score, pred_bbox_offsets = fpn_rpn_reshape( pred_cls_score_list, pred_bbox_offsets_list) # rpn loss valid_masks = rpn_labels >= 0 objectness_loss = F.binary_cross_entropy_with_logits( pred_cls_score[valid_masks], rpn_labels[valid_masks].to(torch.float32), reduction="sum") pos_masks = rpn_labels == 1 localization_loss = smooth_l1_loss(pred_bbox_offsets[pos_masks], rpn_bbox_targets[pos_masks], config.rpn_smooth_l1_beta, reduction="sum") normalizer = 1.0 / (config.train_batch_per_gpu * config.num_sample_anchors) loss_rpn_cls = objectness_loss * normalizer # cls: classification loss loss_rpn_loc = localization_loss * normalizer loss_dict = {} loss_dict['loss_rpn_cls'] = loss_rpn_cls loss_dict['loss_rpn_loc'] = loss_rpn_loc return rpn_rois, rpn_rois_inds, loss_dict else: return rpn_rois, rpn_rois_inds
def forward(self, features, im_info, boxes=None): # prediction pred_cls_score_list = [] pred_bbox_offsets_list = [] for x in features: t = F.relu(self.rpn_conv(x)) pred_cls_score_list.append(self.rpn_cls_score(t)) pred_bbox_offsets_list.append(self.rpn_bbox_offsets(t)) # get anchors all_anchors_list = [] fm_stride = 2**(len(features) + 1) for fm in features: layer_anchors = self.anchors_generator(fm, fm_stride) fm_stride = fm_stride // 2 all_anchors_list.append(layer_anchors) # sample from the predictions rpn_rois, rpn_probs = find_top_rpn_proposals(self.training, pred_bbox_offsets_list, pred_cls_score_list, all_anchors_list, im_info) if self.training: rpn_labels, rpn_bbox_targets = fpn_anchor_target( boxes, im_info, all_anchors_list) #rpn_labels = rpn_labels.astype(np.int32) pred_cls_score, pred_bbox_offsets = fpn_rpn_reshape( pred_cls_score_list, pred_bbox_offsets_list) # rpn loss valid_masks = rpn_labels >= 0 valid_inds = mask_to_inds(valid_masks) objectness_loss = softmax_loss(pred_cls_score.ai[valid_inds], rpn_labels.ai[valid_inds]) #objectness_loss = objectness_loss * valid_masks pos_masks = rpn_labels > 0 localization_loss = smooth_l1_loss(pred_bbox_offsets, rpn_bbox_targets, config.rpn_smooth_l1_beta) localization_loss = localization_loss * pos_masks normalizer = 1.0 / (valid_masks.sum()) loss_rpn_cls = objectness_loss.sum() * normalizer loss_rpn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rpn_cls'] = loss_rpn_cls loss_dict['loss_rpn_loc'] = loss_rpn_loc return rpn_rois, loss_dict else: return rpn_rois
def emd_loss(p_b0, p_c0, p_b1, p_c1, targets, labels): pred_box = F.concat([p_b0, p_b1], axis=1).reshape(-1, p_b0.shapeof()[-1]) pred_score = F.concat([p_c0, p_c1], axis=1).reshape(-1, p_c0.shapeof()[-1]) targets = targets.reshape(-1, 4) labels = labels.reshape(-1) fg_masks = F.greater(labels, 0) non_ignore_masks = F.greater_equal(labels, 0) # loss for regression loss_box_reg = smooth_l1_loss(pred_box, targets, config.rcnn_smooth_l1_beta) # loss for classification loss_cls = softmax_loss(pred_score, labels) loss = loss_cls * non_ignore_masks + loss_box_reg * fg_masks loss = loss.reshape(-1, 2).sum(axis=1) return loss.reshape(-1, 1)
def emd_loss(p_b0, p_c0, p_b1, p_c1, targets, labels): pred_box = torch.cat([p_b0, p_b1], dim=1).reshape(-1, p_b0.shape[-1]) pred_score = torch.cat([p_c0, p_c1], dim=1).reshape(-1, p_c0.shape[-1]) targets = targets.reshape(-1, 4) labels = labels.flatten() # loss for regression fg_inds = torch.nonzero(labels>0).flatten() loss_box_reg = smooth_l1_loss( pred_box[fg_inds], targets[fg_inds], config.rcnn_smooth_l1_beta, 'none').sum(axis=1) # loss for classification loss_cls = F.cross_entropy(pred_score, labels.long(), reduction='none', ignore_index=-1) loss = loss_cls loss[fg_inds] = loss[fg_inds] + loss_box_reg loss = loss.reshape(-1, 2).sum(axis=1) return loss.reshape(-1, 1)
def emd_loss_multi(p_b_list, p_c_list, targets, labels, top_k): #print([x.size() for x in p_b_list]) pred_box = torch.cat(p_b_list, dim=1).reshape(-1, p_b_list[0].shape[-1]) pred_score = torch.cat(p_c_list, dim=1).reshape(-1, p_c_list[0].shape[-1]) targets = targets.reshape(-1, 4) labels = labels.flatten() # loss for regression fg_inds = torch.nonzero(labels>0).flatten() loss_box_reg = smooth_l1_loss( pred_box[fg_inds], targets[fg_inds], config.rcnn_smooth_l1_beta, 'none').sum(axis=1) # loss for classification loss_cls = F.cross_entropy(pred_score, labels.long(), reduction='none', ignore_index=-1) loss = loss_cls loss[fg_inds] = loss[fg_inds] + loss_box_reg loss = loss.reshape(-1, top_k).sum(axis=1) return loss.reshape(-1, 1)
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:][::-1] stride = [4, 8, 16, 32] pool_features, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_cls = self.pred_cls(roi_feature) pred_delta = self.pred_delta(roi_feature) if self.training: # loss for regression labels = labels.astype(np.int32).reshape(-1) # mulitple class to one pos_masks = labels > 0 pred_delta = pred_delta.reshape(-1, config.num_classes, 4) indexing_label = (labels * pos_masks).reshape(-1, 1) indexing_label = indexing_label.broadcast((labels.shapeof()[0], 4)) pred_delta = F.indexing_one_hot(pred_delta, indexing_label, 1) localization_loss = smooth_l1_loss(pred_delta, bbox_targets, config.rcnn_smooth_l1_beta) localization_loss = localization_loss * pos_masks # loss for classification valid_masks = labels >= 0 objectness_loss = softmax_loss(pred_cls, labels) objectness_loss = objectness_loss * valid_masks normalizer = 1.0 / (valid_masks.sum()) loss_rcnn_cls = objectness_loss.sum() * normalizer loss_rcnn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rcnn_cls'] = loss_rcnn_cls loss_dict['loss_rcnn_loc'] = loss_rcnn_loc return loss_dict else: pred_scores = F.softmax(pred_cls)[:, 1:].reshape(-1, 1) pred_delta = pred_delta[:, 4:].reshape(-1, 4) target_shape = (rcnn_rois.shapeof()[0], config.num_classes - 1, 4) base_rois = F.add_axis(rcnn_rois[:, 1:5], 1).broadcast(target_shape).reshape(-1, 4) pred_bbox = restore_bbox(base_rois, pred_delta, True) pred_bbox = F.concat([pred_bbox, pred_scores], axis=1) return pred_bbox
def forward(self, fpn_fms, proposals, labels=None, bbox_targets=None): # input p2-p5 fpn_fms = fpn_fms[1:][::-1] stride = [4, 8, 16, 32] #pool_features = roi_pooler(fpn_fms, proposals, stride, (7, 7), "ROIAlignV2") pool_features, proposals, labels, bbox_targets = roi_pool( fpn_fms, proposals, stride, (7, 7), 'roi_align', labels, bbox_targets) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_cls = self.pred_cls(roi_feature) pred_delta = self.pred_delta(roi_feature) if self.training: # loss for regression labels = labels.astype(np.int32).reshape(-1) # mulitple class to one pos_masks = labels > 0 localization_loss = smooth_l1_loss( pred_delta, bbox_targets, config.rcnn_smooth_l1_beta) localization_loss = localization_loss * pos_masks # loss for classification valid_masks = labels >= 0 objectness_loss = softmax_loss( pred_cls, labels) objectness_loss = objectness_loss * valid_masks normalizer = 1.0 / (valid_masks.sum()) loss_rcnn_cls = objectness_loss.sum() * normalizer loss_rcnn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict[self.stage_name + '_cls'] = loss_rcnn_cls loss_dict[self.stage_name + '_loc'] = loss_rcnn_loc pred_bbox = restore_bbox(proposals[:, 1:5], pred_delta, True) pred_proposals = F.zero_grad(F.concat([proposals[:, 0].reshape(-1, 1), pred_bbox], axis=1)) return pred_proposals, loss_dict else: pred_scores = F.softmax(pred_cls)[:, 1].reshape(-1, 1) pred_bbox = restore_bbox(proposals[:, 1:5], pred_delta, True) pred_proposals = F.concat([proposals[:, 0].reshape(-1, 1), pred_bbox], axis=1) return pred_proposals, pred_scores
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:][::-1] stride = [4, 8, 16, 32] pool_features, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_cls = self.pred_cls(roi_feature) pred_delta = self.pred_delta(roi_feature) if self.training: # loss for regression labels = labels.astype(np.int32).reshape(-1) pos_masks = labels > 0 localization_loss = smooth_l1_loss( pred_delta, bbox_targets, config.rcnn_smooth_l1_beta) localization_loss = localization_loss * pos_masks # loss for classification valid_masks = labels >= 0 objectness_loss = softmax_loss( pred_cls, labels) objectness_loss = objectness_loss * valid_masks normalizer = 1.0 / (valid_masks.sum()) loss_rcnn_cls = objectness_loss.sum() * normalizer loss_rcnn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rcnn_cls'] = loss_rcnn_cls loss_dict['loss_rcnn_loc'] = loss_rcnn_loc return loss_dict else: pred_scores = F.softmax(pred_cls) pred_bbox = restore_bbox(rcnn_rois[:, 1:5], pred_delta, True) pred_bbox = F.concat([pred_bbox, pred_scores[:, 1].reshape(-1,1)], axis=1) return pred_bbox
def emd_loss(p_b0, p_c0, p_b1, p_c1, targets, labels): pred_box = F.concat([p_b0, p_b1], axis=1).reshape(-1, p_b0.shapeof()[-1]) pred_box = pred_box.reshape(-1, config.num_classes, 4) pred_score = F.concat([p_c0, p_c1], axis=1).reshape(-1, p_c0.shapeof()[-1]) targets = targets.reshape(-1, 4) labels = labels.reshape(-1).astype(np.int32) fg_masks = F.greater(labels, 0) non_ignore_masks = F.greater_equal(labels, 0) # mulitple class to one indexing_label = (labels * fg_masks).reshape(-1,1) indexing_label = indexing_label.broadcast((labels.shapeof()[0], 4)) pred_box = F.indexing_one_hot(pred_box, indexing_label, 1) # loss for regression loss_box_reg = smooth_l1_loss( pred_box, targets, config.rcnn_smooth_l1_beta) # loss for classification loss_cls = softmax_loss(pred_score, labels) loss = loss_cls*non_ignore_masks + loss_box_reg * fg_masks loss = loss.reshape(-1, 2).sum(axis=1) return loss.reshape(-1, 1)