def __call__(self, proposals, mask_logits, char_mask_logits, mask_targets, char_mask_targets, char_mask_weights): """ Arguments: proposals (list[BoxList]) mask_logits (Tensor) targets (list[BoxList]) Return: mask_loss (Tensor): scalar tensor containing the loss """ mask_targets = cat(mask_targets, dim=0) char_mask_targets = cat(char_mask_targets, dim=0) char_mask_weights = cat(char_mask_weights, dim=0) char_mask_weights = char_mask_weights.mean(dim=0) # torch.mean (in binary_cross_entropy_with_logits) doesn't # accept empty tensors, so handle it separately if mask_targets.numel() == 0 or char_mask_targets.numel() == 0: return mask_logits.sum() * 0, char_mask_targets.sum() * 0 mask_loss = F.binary_cross_entropy_with_logits( mask_logits.squeeze(dim=1), mask_targets) if self.use_weighted_loss: char_mask_loss = F.cross_entropy(char_mask_logits, char_mask_targets, char_mask_weights, ignore_index=-1) else: char_mask_loss = F.cross_entropy(char_mask_logits, char_mask_targets, ignore_index=-1) return mask_loss, char_mask_loss
def __call__(self, predictions): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) return self.loss_computation(predictions, labels, regression_targets)
def concat_box_prediction_layers(box_cls, box_regression): box_cls_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for box_cls_per_level, box_regression_per_level in zip( box_cls, box_regression): N, AxC, H, W = box_cls_per_level.shape Ax4 = box_regression_per_level.shape[1] A = Ax4 // 4 C = AxC // A box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H, W) box_cls_flattened.append(box_cls_per_level) box_regression_per_level = permute_and_flatten( box_regression_per_level, N, A, 4, H, W) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) return box_cls, box_regression
def __call__(self, proposals, mask_logits, targets): """ Arguments: proposals (list[BoxList]) mask_logits (Tensor) targets (list[BoxList]) Return: mask_loss (Tensor): scalar tensor containing the loss """ labels, mask_targets = self.prepare_targets(proposals, targets) labels = cat(labels, dim=0) mask_targets = cat(mask_targets, dim=0) positive_inds = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[positive_inds] # torch.mean (in binary_cross_entropy_with_logits) doesn't # accept empty tensors, so handle it separately if mask_targets.numel() == 0: return mask_logits.sum() * 0 mask_loss = F.binary_cross_entropy_with_logits( mask_logits[positive_inds, labels_pos], mask_targets) return mask_loss
def binary_dice_loss_with_logits(mask_logits, mask_targets): r"""Function that measures Binary Dice Loss between target and output logits. Args: mask_logits: Tensor of arbitrary shape mask_targets: Tensor of the same shape as input Examples:: >>> input = torch.randn(3, requires_grad=True) >>> target = torch.empty(3).random_(2) >>> loss = binary_dice_loss_with_logits(input, target) >>> loss.backward() """ #########################################################################zy debug #import pdb #pdb.set_trace() ######################################################################### mask_logits = mask_logits.sigmoid() if not (mask_logits.size() == mask_targets.size()): raise ValueError("Mask_logits size ({}) must be the same as mask_targets size ({})".format(mask_logits.size(), mask_targets.size())) if len(mask_logits.size()) == 3: mask_logits = cat([mask_logits[i] for i in range(mask_logits.size()[0])],dim=0) mask_targets = cat([mask_targets[i] for i in range(mask_targets.size()[0])],dim=0) smooth = 1 intersection = (mask_logits.mul(mask_targets)).sum() union = mask_logits.sum() + mask_targets.sum() return 1 - (2. * intersection + smooth) / (union + smooth)
def obj_classification_loss(self, proposals, class_logits): class_logits = cat(class_logits, dim=0) device = class_logits.device labels = cat( [proposal.get_field("gt_labels") for proposal in proposals], dim=0) classification_loss = F.cross_entropy(class_logits, labels) return classification_loss
def __call__(self, proposals, keypoint_logits): heatmaps = [] valid = [] for proposals_per_image in proposals: # print('=====', proposals_per_image.fields()) kp = proposals_per_image.get_field("keypoints") # print('--keypoints_per_image2', kp.keypoints.shape) heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap( kp, proposals_per_image, self.discretization_size) # print('---heatmaps_per_image', heatmaps_per_image.shape) heatmaps.append(heatmaps_per_image.view(-1)) valid.append(valid_per_image.view(-1)) keypoint_targets = cat(heatmaps, dim=0) valid = cat(valid, dim=0).to(dtype=torch.uint8) # print('>> valid', valid) valid = torch.nonzero(valid, as_tuple=False).squeeze(1) # print('>>>> valid', valid.shape) # torch.mean (in binary_cross_entropy_with_logits) does'nt # accept empty tensors, so handle it sepaartely if keypoint_targets.numel() == 0 or len(valid) == 0: # print('kkkkkkkkkkeypoint_logits.sum() * 0') # return keypoint_logits.sum() * 0 return torch.zeros_like(keypoint_logits.sum(), device=keypoint_logits.sum().device) N, K, H, W = keypoint_logits.shape keypoint_logits = keypoint_logits.view(N * K, H * W) keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid]) return keypoint_loss
def __call__(self, proposals, trans_pred, targets, loss_type): """ :param proposals: (list[BoxList]) :param trans_pred: :param targets:(list[BoxList]) :return: """ labels, label_trans = self.prepare_targets(proposals, targets) labels = cat(labels, dim=0) label_trans = cat(label_trans, dim=0) positive_inds = torch.nonzero(labels > 0).squeeze(1) device_id = label_trans.get_device() if loss_type == 'MSE': loss = nn.MSELoss() loss_trans = loss(trans_pred, label_trans[positive_inds]) elif loss_type == 'L1': loss = nn.L1Loss() loss_trans = loss(trans_pred, label_trans[positive_inds]) elif loss_type == 'HUBER': beta = self.cfg.MODEL.TRANS_HEAD.TRANS_HUBER_THRESHOLD loss_trans = huber_loss(trans_pred, label_trans[positive_inds], device_id, beta) dis_trans = torch.mean( torch.norm((trans_pred - label_trans[positive_inds]), dim=1)).detach() return loss_trans, dis_trans
def __call__(self, proposals, keypoint_logits): heatmaps = [] valid = [] for proposals_per_image in proposals: kp = proposals_per_image.get_field("keypoints") heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap( kp, proposals_per_image, self.discretization_size) heatmaps.append(heatmaps_per_image.view(-1)) valid.append(valid_per_image.view(-1)) keypoint_targets = cat(heatmaps, dim=0) ##维度为[N * K], N个person, 17个关键点 valid = cat(valid, dim=0).to(dtype=torch.bool) valid = torch.nonzero(valid).squeeze(1) # torch.mean (in binary_cross_entropy_with_logits) does'nt # accept empty tensors, so handle it separately if keypoint_targets.numel() == 0 or len(valid) == 0: return keypoint_logits.sum() * 0 N, K, H, W = keypoint_logits.shape keypoint_logits = keypoint_logits.view( N * K, H * W) ##N个person, 17个关键点, 56*56的特征图 keypoint_targets = keypoint_targets.view( N * K, H * W) ##N个person, 17个关键点, 56*56的特征图 # import ipdb;ipdb.set_trace() # keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid]) keypoint_loss = torch.mean( (keypoint_logits[valid] - keypoint_targets[valid])**2) * 1000 # import ipdb;ipdb.set_trace() return keypoint_loss
def __call__(self, class_logits, box_regression, closeup_logits, closeup_labels): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) classification_loss = F.cross_entropy(class_logits, labels) if closeup_logits is not None: closeup_labels = closeup_labels.repeat(len(closeup_logits)) closeup_logits = torch.cat(closeup_logits, dim=0) extra_classification_loss = F.cross_entropy( closeup_logits, closeup_labels) / 10 else: extra_classification_loss = None # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss, extra_classification_loss
def __call__(self, proposals, mask_logits, targets): """ Arguments: proposals (list[BoxList]): 每张图片上的roi, 只包含正样本 mask_logits (Tensor): [num_pos_roi, 81, 28, 28] targets (list[BoxList]) mask_loss计算流程: 首先根据roi(proposals)和targets之间的IoU找到每个roi 对应的gt_box, 同时可以得到与这个gt_box对应的mask值, 将真实的mask值根 据对应的roi进行裁剪, 缩放, 处理成与mask_logits同样的大小(MxM), 然后计 算两者之间的交叉熵 """ labels, mask_targets = self.prepare_targets(proposals, targets) labels = cat(labels, dim=0) # [num_roi] mask_targets = cat(mask_targets, dim=0) # [num_roi, 28, 28] positive_inds = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[positive_inds] # torch.mean (in binary_cross_entropy_with_logits) doesn't # accept empty tensors, so handle it separately if mask_targets.numel() == 0: return mask_logits.sum() * 0 # l_n = y_n * log[σ(x_n)] + (1-y_n) * log[1 - σ(x_n)] # 这个损失函数结合了sigmoid函数和CE损失, 首先使用sigmoid函数把logits转换成0到1 # 之间的概率值, 然后计算预测的概率值与targets之间的交叉熵 mask_loss = F.binary_cross_entropy_with_logits( mask_logits[positive_inds, labels_pos], mask_targets) return mask_loss
def reduced_bbox_result(self, box_regression, proposals): box_regression = cat(box_regression, dim=0) device = box_regression.device labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) image_shapes = [box.size for box in proposals] boxes_per_image = [len(box) for box in proposals] concat_boxes = torch.cat([a.bbox for a in proposals], dim=0) prefix_sum_boxes = [boxes_per_image[0]] for box_per_images in boxes_per_image[1:]: prefix_sum_boxes.append(box_per_images + prefix_sum_boxes[-1]) reduced_boxes_per_image = [0] * len(prefix_sum_boxes) i, j = 0, 0 while i < len(sampled_pos_inds_subset): if sampled_pos_inds_subset[i] < prefix_sum_boxes[j]: reduced_boxes_per_image[j] += 1 i += 1 else: j += 1 proposals = self.box_coder.decode( box_regression[sampled_pos_inds_subset[:, None], map_inds], concat_boxes[sampled_pos_inds_subset]) proposals = proposals.split(reduced_boxes_per_image, dim=0) box_targets = self.box_coder.decode( regression_targets[sampled_pos_inds_subset], concat_boxes[sampled_pos_inds_subset]) box_targets = box_targets.split(reduced_boxes_per_image, dim=0) result = [] for boxes, image_shape in zip(proposals, image_shapes): boxlist = BoxList(boxes, image_shape, mode="xyxy") boxlist = boxlist.clip_to_image(remove_empty=False) result.append(boxlist) box_result = [] for boxes, image_shape in zip(box_targets, image_shapes): boxlist = BoxList(boxes, image_shape, mode="xyxy") boxlist = boxlist.clip_to_image(remove_empty=False) box_result.append(boxlist) return result, box_result
def __call__(self, proposals, mask_logits, targets): """ Arguments: proposals (list[BoxList]) mask_logits (Tensor) targets (list[BoxList]) Return: mask_loss (Tensor): scalar tensor containing the loss """ labels, mask_targets = self.prepare_targets(proposals, targets) labels_pos = cat(labels, dim=0) mask_targets = cat(mask_targets, dim=0) # mask samples are all positive, no need to search for positive samples #positive_inds = torch.nonzero(labels > 0).squeeze(1) #labels_pos = labels[positive_inds] # torch.mean (in binary_cross_entropy_with_logits) doesn't # accept empty tensors, so handle it separately if mask_targets.numel() == 0: return mask_logits.sum() * 0 N, C, H, W = mask_logits.shape # negative sampls are already filtered out, so all samples are positive positive_inds = torch.arange(0, N, device = labels_pos.device) index_select_indices = (positive_inds * mask_logits.size(1) + labels_pos).view(-1) mask_logits_sampled = mask_logits.view(-1, H, W).index_select(0, index_select_indices).view(N, H, W) mask_loss = F.binary_cross_entropy_with_logits( mask_logits_sampled, mask_targets ) return mask_loss
def __call__(self, proposals, word_logits, targets): labels, words, word_lens = self.prepare_targets(proposals, targets) labels = cat(labels, dim=0) word_targets = cat(words, dim=0) word_lens = cat(word_lens, dim=0) ########################## positive samples ########################### positive_inds = torch.nonzero(labels > 0).squeeze(1) # torch.mean (in binary_cross_entropy_with_logits) doesn't # accept empty tensors, so handle it separately if word_targets.numel() == 0: return word_logits.sum() * 0 pos_logits = word_logits[:, positive_inds].log_softmax(2) pos_wlens = word_lens[positive_inds] pos_target = word_targets[positive_inds] # print("word_lens:", word_lens, positive_inds) limited_ind = pos_wlens < 18 word_lens_lim = pos_wlens[limited_ind] word_targets_lim = pos_target[limited_ind] pos_logits_lim = pos_logits[:, limited_ind] if word_targets_lim.numel() == 0: return pos_logits.sum() * 0 batch_size = pos_logits_lim.size()[1] predicted_length = torch.tensor([pos_logits_lim.size(0)] * batch_size) # print('words_targets:', word_targets) word_targets_flatten = word_targets_lim.view(-1) positive_w_inds = torch.nonzero(word_targets_flatten > 0).squeeze(1) # print('positive_inds:', positive_inds) word_targets_flatten = word_targets_flatten[positive_w_inds] if _DEBUG: self.show_cnt += 1 if self.show_cnt % 100 == 0: pos_logits_show = pos_logits_lim.permute(1, 0, 2) pos_value, pos_inds = pos_logits_show.max(2) # print('word_lens_lim:', word_lens_lim) # print('pos_logits:', pos_inds, word_targets_flatten) predict_seq = pos_inds.data.cpu().numpy() word_targets_np = word_targets_lim.data.cpu().numpy() for a in range(predict_seq.shape[0]): pred_str = '' gt_str = '' for b in range(predict_seq.shape[1]): pred_str += self.alphabet[predict_seq[a, b]] for c in range(word_targets_np.shape[1]): if word_targets_np[a, c] != 0: #print('use int?', word_targets_np[a, c]) gt_str += self.alphabet[int(word_targets_np[a, c])] # print('lstr:', pred_str, gt_str) print('lstr:', "|" + pred_str + "|", "|" + gt_str + "|") return self.ctc_loss( pos_logits_lim, word_targets_flatten.long(), predicted_length.long(), word_lens_lim.long()).sum() / pos_logits.size()[0] / batch_size
def __call__(self, proposals, keypoint_logits): heatmaps = [] valid = [] for proposals_per_image in proposals: kp = proposals_per_image.get_field("keypoints") heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap( kp, proposals_per_image, self.discretization_size) heatmaps.append(heatmaps_per_image.view(-1)) valid.append(valid_per_image.view(-1)) keypoint_targets = cat(heatmaps, dim=0) valid = cat(valid, dim=0).to(dtype=torch.uint8) valid = torch.nonzero(valid).squeeze(1) # torch.mean (in binary_cross_entropy_with_logits) does'nt # accept empty tensors, so handle it sepaartely if keypoint_targets.numel() == 0 or len(valid) == 0: return keypoint_logits.sum() * 0 N, K, H, W = keypoint_logits.shape keypoint_logits = keypoint_logits.view(N * K, H * W) keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid]) return keypoint_loss
def __call__(self, proposals, ke_logits_x, ke_logits_y, targets): """ Arguments: proposals (list[BoxList]) mask_logits (Tensor) targets (list[BoxList]) Return: mask_loss (Tensor): scalar tensor containing the loss """ labels, mask_targets = self.prepare_targets(proposals, targets) labels = cat(labels, dim=0) mask_targets = cat(mask_targets, dim=0) positive_inds = torch.nonzero(labels > 0).squeeze(1) if mask_targets.numel() == 0: return 0 sb, sh, sw = mask_targets.shape mask_loss_x = edge_loss( ke_logits_x[positive_inds, 0].view([sb, 1, sh, sw]), mask_targets.view([sb, 1, sh, sw])) mask_loss_y = edge_loss( ke_logits_y[positive_inds, 0].view([sb, 1, sh, sw]), mask_targets.view([sb, 1, sh, sw])) mask_loss = mask_loss_x + mask_loss_y return mask_loss, mask_loss_x, mask_loss_y
def __call__(self, class_logits): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) Returns: classification_loss (Tensor) """ class_logits = cat(class_logits, dim=0) device = class_logits.device if not hasattr(self, "_proposal_pairs"): raise RuntimeError("subsample needs to be called before") proposals = self._proposal_pairs labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) rel_fg_cnt = len(labels.nonzero()) rel_bg_cnt = labels.shape[0] - rel_fg_cnt ce_weights = labels.new(class_logits.size(1)).fill_(1).float() ce_weights[0] = float(rel_fg_cnt) / (rel_bg_cnt + 1e-5) classification_loss = F.cross_entropy(class_logits, labels, weight=ce_weights) return classification_loss
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) classification_loss = F.cross_entropy(class_logits, labels) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels.index_select(0, sampled_pos_inds_subset) if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) index_select_indices = ( (sampled_pos_inds_subset[:, None]) * box_regression.size(1) + map_inds).view(-1) box_regression_sampled = box_regression.view(-1).index_select( 0, index_select_indices).view(map_inds.shape[0], map_inds.shape[1]) regression_targets_sampled = regression_targets.index_select( 0, sampled_pos_inds_subset) box_loss = smooth_l1_loss( box_regression_sampled, regression_targets_sampled, size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def compute_multi_modal(targets, word2vec, devices): word2vec = torch.tensor(word2vec['a']).to(devices) category_info = [] spatial_info = [] eps = torch.tensor(10e-16).to(devices) for i, target in enumerate(targets): subject_category = (target.get_field('subject_category') - 1) object_category = (target.get_field('object_category') - 1) sub = word2vec[subject_category] ob = word2vec[object_category] w2v = torch.cat((sub, ob), 1) category_info.append(w2v) subject_boundingboxes = target.get_field('subject_boundingboxes') object_boundingboxes = target.get_field('object_boundingboxes') W, H = target.size W = float(W) H = float(H) xs_o = object_boundingboxes[:, 0].view(-1, 1) xm_o = object_boundingboxes[:, 2].view(-1, 1) ys_o = object_boundingboxes[:, 1].view(-1, 1) ym_o = object_boundingboxes[:, 3].view(-1, 1) xs_s = subject_boundingboxes[:, 0].view(-1, 1) xm_s = subject_boundingboxes[:, 2].view(-1, 1) ys_s = subject_boundingboxes[:, 1].view(-1, 1) ym_s = subject_boundingboxes[:, 3].view(-1, 1) x_0 = xs_s / W x_1 = ys_s / H x_2 = xm_s / W x_3 = ym_s / H x_4 = ((ym_s - ys_s) * (xm_s - xs_s)) / (W * H) x_5 = xs_o / W x_6 = ys_o / H x_7 = xm_o / W x_8 = ym_o / H x_9 = ((ym_o - ys_o) * (xm_o - xs_o)) / (W * H) w_o = torch.max(xm_o - xs_o, eps) h_o = torch.max(ym_o - ys_o, eps) w_s = torch.max(xm_s - xs_s, eps) h_s = torch.max(ym_s - ys_s, eps) x_10 = (xs_s - xs_o) / w_o x_11 = (ys_s - ys_o) / h_o x_12 = torch.log(w_s / w_o) x_13 = torch.log(h_s / h_o) spatial = torch.cat((x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7, x_8, x_9, x_10, x_11, x_12, x_13), dim=1) spatial_info.append(spatial) category_info = cat(category_info, dim=0).float() spatial_info = cat(spatial_info, dim=0) spatial_info = torch.div(spatial_info, torch.norm(spatial_info, dim=1).unsqueeze(-1)).float() return category_info, spatial_info
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) fitz_categories = cat( [proposal.get_field("fitz_categories") for proposal in proposals], dim=0) classification_loss = F.cross_entropy(class_logits, labels, reduction="none") classification_loss = self.augment_loss(classification_loss, fitz_categories, use_mean=True) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] map_inds = 4 * labels_pos[:, None] + \ torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=None, beta=1, ) box_loss = self.augment_loss(box_loss, fitz_categories[sampled_pos_inds_subset]) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) # [num_roi, 81] box_regression = cat(box_regression, dim=0) # [num_roi, 81*4] device = class_logits.device # 调用这个函数之前必须已经调用了subsample函数 if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals # labels: 每个roi的标签, 0~80 [num_roi, 1] # 这个label是在match_targets_to_proposals函数中设置的, target[idx]会同时复制labels labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) # regression_targets: [t_x, t_y, t_w, t_h] [num_roi, 4] regression_targets = cat( [proposal.get_field("regression_targets") for proposal in proposals], dim=0 ) # 结合了log_softmax和交叉熵损失 classification_loss = F.cross_entropy(class_logits, labels) # 正样本的索引值 sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) # 正样本的label labels_pos = labels[sampled_pos_inds_subset] if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: # 对于每个roi, roi head的box预测值是为每个类别预测一个box # 这里直接选出ground truth类别的预测值 map_inds = 4 * labels_pos[:, None] + torch.tensor( [0, 1, 2, 3], device=device) # 只对正样本计算回归损失 box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing pos_inds = torch.nonzero(labels > 0).squeeze(1) pos_numel = pos_inds.numel() if pos_numel > 0: classification_loss = F.cross_entropy(class_logits, labels) if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: map_inds = 4 * labels[pos_inds][:, None] + torch.tensor( [0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[pos_inds[:, None], map_inds], regression_targets[pos_inds], size_average=True, beta=1, ) classification_loss *= self.guided_loss_weighter( box_loss, classification_loss) return classification_loss, box_loss else: zero = pos_inds.new_tensor([0]).float() return zero, zero
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) # if sum(labels>100)>0: # import pdb; pdb.set_trace() # set the not-trained classes as ignore lables>100 -> -1 ignore_index=-1 # labels[labels>(self.num_classfiers-1)] = -1 classification_loss = F.cross_entropy(class_logits, labels, ignore_index=-1) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def __call__(self, attr_logits, class_logits, box_regression): classification_loss, box_loss = \ super(FastRCNNLossWithAttrComputation, self).__call__(class_logits, box_regression) attr_logits = cat(attr_logits, dim=0) attrs = cat([proposal.get_field("attrs") for proposal in self._proposals], dim=0) attrs_loss = 0.5 * F.cross_entropy(attr_logits, attrs, ignore_index=-1) return attrs_loss, classification_loss, box_loss
def __call__(self, proposals, all_mask_logits, targets): """ Arguments: proposals (list[BoxList]) mask_logits (Tensor) targets (list[BoxList]) Return: mask_loss (Tensor): scalar tensor containing the loss """ labels = cat([p.get_field("proto_labels") for p in proposals]) labels = (labels > 0).long() pos_inds = torch.nonzero(labels > 0).squeeze(1) if not self.use_mil_loss: mask_logits = all_mask_logits[0] if self.use_box_mask: mask_targets = self.prepare_targets_boxes(proposals) else: _, mask_targets = self.prepare_targets(proposals, targets) mask_targets = cat(mask_targets, dim=0) labels_pos = labels[pos_inds] if mask_targets.numel() == 0: return mask_logits.sum() * 0 mask_loss = F.binary_cross_entropy_with_logits( mask_logits[pos_inds, labels_pos], mask_targets[pos_inds]) return mask_loss labels_cr = self.prepare_targets_cr(proposals) labels_cr = cat(labels_cr, dim=0) mil_losses = [] for mask_logits in all_mask_logits: mil_score = mask_logits[:, 1] mil_score = torch.cat( [mil_score.max(2)[0], mil_score.max(1)[0]], 1) # torch.mean (in binary_cross_entropy_with_logits) doesn't # accept empty tensors, so handle it separately if mil_score.numel() == 0: mil_losses.append(mask_logits.sum() * 0) mil_loss = F.binary_cross_entropy_with_logits( mil_score[pos_inds], labels_cr[pos_inds]) mil_losses.append(mil_loss) if self.use_aff: mask_logits = all_mask_logits[0] mask_logits_n = mask_logits[:, 1:].sigmoid() aff_maps = F.conv2d(mask_logits_n, self.aff_weights, padding=(1, 1)) affinity_loss = mask_logits_n * (aff_maps**2) affinity_loss = torch.mean(affinity_loss) return 1.2 * sum(mil_losses) / len( mil_losses) + 0.05 * affinity_loss else: return sum(mil_losses) / len(mil_losses)
def __call__(self, proposals, keypoint_logits): heatmaps = [] valid = [] for proposals_per_image in proposals: kp = proposals_per_image.get_field("keypoints") heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap( kp, proposals_per_image, self.discretization_size) heatmaps.append(heatmaps_per_image.view(-1)) valid.append(valid_per_image.view(-1)) keypoint_targets = cat(heatmaps, dim=0) valid = cat(valid, dim=0).to(dtype=torch.uint8) valid = torch.nonzero(valid).squeeze(1) # torch.mean (in binary_cross_entropy_with_logits) does'nt # accept empty tensors, so handle it sepaartely if keypoint_targets.numel() == 0 or len(valid) == 0: return keypoint_logits.sum() * 0 N, K, H, W = keypoint_logits.shape keypoint_logits = keypoint_logits.view(N * K, H * W) keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid]) input = F.sigmoid(keypoint_logits.view(N * K, H, W)) #target = keypoint_targets[valid] rnd = random.randrange(20) if rnd % 20 == 0: filename = f'{time.time()}' save_image( input[0, None, :, :], os.path.join('/content/sample_data', filename + '_kp0_input.jpg')) save_image( input[1, None, :, :], os.path.join('/content/sample_data', filename + '_kp1_input.jpg')) save_image( input[2, None, :, :], os.path.join('/content/sample_data', filename + '_kp2_input.jpg')) save_image( input[3, None, :, :], os.path.join('/content/sample_data', filename + '_kp3_input.jpg')) #save_image(target[:,None,:,:], os.path.join('/content/sample_data', filename+'_kp_target.jpg')) #print('N , K, H, W: ', N , K, H, W) #print('input: ', keypoint_logits.size(), keypoint_logits) #print('target: ', keypoint_targets.size(), keypoint_targets) #print('valid: ', valid.size(), valid) return keypoint_loss
def __call__(self, proposals, mask_logits, mil_score, targets): """ Arguments: proposals (list[BoxList]) mask_logits (Tensor) targets (list[BoxList]) Return: mask_loss (Tensor): scalar tensor containing the loss """ # MIL term # Stack mil score and mask_logits if len(mil_score.shape) > 2 or mask_logits.size(1) > 1: # multi-class class_labels, _ = self.prepare_targets(proposals, targets) class_labels = cat(class_labels, dim=0) if len(mil_score.shape) > 2: mil_score = [s[c] for s, c in zip(mil_score, class_labels)] mil_score = torch.stack(mil_score) if mask_logits.size(1) > 1: mask_logits = [m[c] for m, c in zip(mask_logits, class_labels)] mask_logits = torch.stack(mask_logits).unsqueeze(1) # Prepare target labels for mil loss of each col/row. labels = self.prepare_targets_cr( proposals, targets) # for both positive/negative samples labels = cat(labels, dim=0) # Compute MIL term for each col/row MIL mil_loss = F.binary_cross_entropy_with_logits(mil_score, labels) # Pairwise term device = mask_logits.device mask_h, mask_w = mask_logits.size(2), mask_logits.size(3) pairwise_loss = [] # Sigmoid transform to [0, 1] mask_logits_normalize = mask_logits.sigmoid() # Compute pairwise loss for each col/row MIL for w in self.pairwise_weights_list: conv = torch.nn.Conv2d(1, 1, 3, bias=False, padding=(1, 1)) weights = self.center_weight - w weights = weights.view(1, 1, 3, 3).to(device) conv.weight = torch.nn.Parameter(weights) for param in conv.parameters(): param.requires_grad = False aff_map = conv(mask_logits_normalize) cur_loss = (aff_map**2) cur_loss = torch.mean(cur_loss) pairwise_loss.append(cur_loss) pairwise_loss = torch.mean(torch.stack(pairwise_loss)) return 1.0 * mil_loss, 0.05 * pairwise_loss
def convert_to_roi_format(self, boxes): concat_boxes = cat([b.bbox for b in boxes], dim=0) device, dtype = concat_boxes.device, concat_boxes.dtype ids = cat( [ torch.full((len(b), 1), i, dtype=dtype, device=device) for i, b in enumerate(boxes) ], dim=0, ) rois = torch.cat([ids, concat_boxes], dim=1) return rois
def forward(self, roi_features, proposals, logger=None): # labels will be used in DecoderRNN during training use_gt_label = self.training or self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL obj_labels = cat( [proposal.get_field("labels") for proposal in proposals], dim=0) if use_gt_label else None # label/logits embedding will be used as input if self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL: obj_embed = self.obj_embed1(obj_labels) else: obj_logits = cat([ proposal.get_field("predict_logits") for proposal in proposals ], dim=0).detach() obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed1.weight # bbox embedding will be used as input assert proposals[0].mode == 'xyxy' pos_embed = self.bbox_embed(encode_box_info(proposals)) # encode objects with transformer obj_pre_rep = cat((roi_features, obj_embed, pos_embed), -1) num_objs = [len(p) for p in proposals] obj_pre_rep = self.lin_obj(obj_pre_rep) obj_feats = self.context_obj(obj_pre_rep, num_objs) # predict obj_dists and obj_preds if self.mode == 'predcls': obj_preds = obj_labels obj_dists = to_onehot(obj_preds, self.num_obj_cls) edge_pre_rep = cat( (roi_features, obj_feats, self.obj_embed2(obj_labels)), dim=-1) else: obj_dists = self.out_obj(obj_feats) use_decoder_nms = self.mode == 'sgdet' and not self.training if use_decoder_nms: boxes_per_cls = [ proposal.get_field('boxes_per_cls') for proposal in proposals ] obj_preds = self.nms_per_cls(obj_dists, boxes_per_cls, num_objs) else: obj_preds = obj_dists[:, 1:].max(1)[1] + 1 edge_pre_rep = cat( (roi_features, obj_feats, self.obj_embed2(obj_preds)), dim=-1) # edge context edge_pre_rep = self.lin_edge(edge_pre_rep) edge_ctx = self.context_edge(edge_pre_rep, num_objs) return obj_dists, obj_preds, edge_ctx
def __call__(self, proposals, mask_logits, targets): """ Arguments: proposals (list[BoxList]) mask_logits (Tensor) targets (list[BoxList]) Return: mask_loss (Tensor): scalar tensor containing the loss """ labels, mask_targets = self.prepare_targets(proposals, targets) labels = cat(labels, dim=0) mask_targets = cat(mask_targets, dim=0) positive_inds = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[positive_inds] if DEBUG: # print('labels:', labels.shape) # print('mask_targets:', mask_targets.shape) # print('proposals:', proposals) # print('mask_logits:', mask_logits.shape) first_target = (mask_targets[0, ...].data.cpu().numpy() * 255).astype(np.uint8) first_mask = ( mask_logits[positive_inds][0, 1].sigmoid().data.cpu().numpy() * 255).astype(np.uint8) # print('first_target:', first_target.shape, np.unique(mask_logits[positive_inds][0, 1].sigmoid().data.cpu().numpy())[-10:]) # print('first_mask:', first_mask.shape) first_box = proposals[0].bbox[0] # print('first_box:', first_box, proposals[0].get_field("scores")) first_target = Image.fromarray(first_target) first_mask = Image.fromarray(first_mask) first_target = first_target.resize( (int(first_box[2]), int(first_box[3]))) first_mask = first_mask.resize( (int(first_box[2]), int(first_box[3]))) first_target.save('first_target.jpg', 'jpeg') first_mask.save('first_mask.jpg', 'jpeg') # torch.mean (in binary_cross_entropy_with_logits) doesn't # accept empty tensors, so handle it separately if mask_targets.numel() == 0: return mask_logits.sum() * 0 mask_loss = F.binary_cross_entropy_with_logits( mask_logits[positive_inds, labels_pos], mask_targets) return mask_loss
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat( [proposal.get_field("regression_targets") for proposal in proposals], dim=0 ) classification_loss = F.cross_entropy(class_logits, labels) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss