def forward(self, loc_preds, conf_preds, loc_targets, conf_targets): """Compute loss between (loc_preds, loc_targets) and (conf_preds, conf_targets). Args: loc_preds(tensor): predicted locations, sized [batch_size, 8732, 4] loc_targets(tensor): encoded target locations, sized [batch_size, 8732, 4] conf_preds(tensor): predicted class confidences, sized [batch_size, 8732, num_classes] conf_targets:(tensor): encoded target classes, sized [batch_size, 8732] is_print: whether print loss img: using for visualization loss: (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + CrossEntropyLoss(conf_preds, conf_targets) loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets) conf_loss = CrossEntropyLoss(pos_conf_preds, pos_conf_targets) + CrossEntropyLoss(neg_conf_preds, neg_conf_targets) """ # loc_targets, conf_targets = self.ssd_target_generator(feat_list, data_dict) batch_size, num_boxes, _ = loc_preds.size() pos = conf_targets > 0 # [N,8732], pos means the box matched. num_matched_boxes = pos.data.float().sum() if num_matched_boxes == 0: print("No matched boxes") # loc_loss. pos_mask = pos.unsqueeze(2).expand_as(loc_preds) # [N, 8732, 4] pos_loc_preds = loc_preds[pos_mask].view(-1, 4) # [pos,4] pos_loc_targets = loc_targets[pos_mask].view(-1, 4) # [pos,4] loc_loss = self.smooth_l1_loss(pos_loc_preds, pos_loc_targets) # F.smooth_l1_loss(pos_loc_preds, pos_loc_targets, reduction='sum') # conf_loss. conf_loss = self._cross_entropy_loss(conf_preds.view(-1, self.num_classes), conf_targets.view(-1)) # [N*8732,] neg = self._hard_negative_mining(conf_loss, pos) # [N,8732] pos_mask = pos.unsqueeze(2).expand_as(conf_preds) # [N,8732,21] neg_mask = neg.unsqueeze(2).expand_as(conf_preds) # [N,8732,21] mask = (pos_mask + neg_mask).gt(0) pos_and_neg = (pos + neg).gt(0) preds = conf_preds[mask].view(-1, self.num_classes) # [pos + neg,21] targets = conf_targets[pos_and_neg] # [pos + neg,] conf_loss = F.cross_entropy(preds, targets, reduction='sum', ignore_index=-1) if num_matched_boxes > 0: loc_loss = loc_loss / num_matched_boxes conf_loss = conf_loss / num_matched_boxes else: return conf_loss + loc_loss Log.debug("loc_loss: %f, cls_loss: %f" % (float(loc_loss.item()), float(conf_loss.item()))) return loc_loss + conf_loss
def forward(self, loc_preds, cls_preds, loc_targets, cls_targets): """Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets). Args: loc_preds(tensor): predicted locations, sized [batch_size, #anchors, 4]. loc_targets(tensor): encoded target locations, sized [batch_size, #anchors, 4]. cls_preds(tensor): predicted class confidences, sized [batch_size, #anchors, #classes]. cls_targets(tensor): encoded target labels, sized [batch_size, #anchors]. Returns: (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + FocalLoss(cls_preds, cls_targets). """ pos = cls_targets > 0 # [N,#anchors] num_pos = pos.data.long().sum() # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets) mask = pos.unsqueeze(2).expand_as(loc_preds) # [N,#anchors,4] masked_loc_preds = loc_preds[mask].view(-1, 4) # [#pos,4] masked_loc_targets = loc_targets[mask].view(-1, 4) # [#pos,4] loc_loss = F.smooth_l1_loss(masked_loc_preds, masked_loc_targets, reduction='sum') # cls_loss = FocalLoss(loc_preds, loc_targets) pos_neg = cls_targets > -1 # exclude ignored anchors # num_pos_neg = pos_neg.data.long().sum() mask = pos_neg.unsqueeze(2).expand_as(cls_preds) masked_cls_preds = cls_preds[mask].view(-1, self.num_classes) cls_loss = self.focal_loss(masked_cls_preds, cls_targets[pos_neg]) num_pos = max(1.0, num_pos) Log.debug('loc_loss: %.3f | cls_loss: %.3f' % (loc_loss.data[0] / num_pos, cls_loss.data[0] / num_pos)) loss = loc_loss / num_pos + cls_loss / num_pos return loss