def forward(self, pred, prior_box, targets): loc_data, conf_data = pred batch_num = loc_data.size(0) prior_box_num = (prior_box.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(batch_num, prior_box_num, 4) conf_t = torch.LongTensor(batch_num, prior_box_num) for idx in range(batch_num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = prior_box.data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(batch_num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predict, truth, weight=None): if self.mixup: assert predict.shape[0] == truth.shape[0] == weight.shape[0] else: assert predict.shape[0] == truth.shape[0] if self.softmax_focal: # using OHEM and focal loss with CE soft_score = focal_sum_exp(predict) pro = self.alpha * (1 - soft_score)**self.gamma cmsloss = (log_sum_exp(predict) - predict.gather( 1, truth.view(-1, 1))) * pro.gather(1, truth.view(-1, 1)) elif self.label_smooth: cmsloss = (log_sum_exp(predict, label_smooth=True) * truth).sum( 1, keepdim=True) else: cmsloss = log_sum_exp(predict) - predict.gather( 1, truth.view(-1, 1)) if self.mixup: cmsloss = cmsloss * weight if self.size_average: return cmsloss.mean() else: return cmsloss.sum()
def run(self, bbox_regressions_list, classifications_list, priors, all_priors, targets): cls_mask_list = [] loc_mask_list = [] for i in range(len(bbox_regressions_list)): prior = priors[i].cuda() bbox_regression = bbox_regressions_list[i] conf_data = classifications_list[i] num = bbox_regression.size(0) num_priors = int(prior.size(0)) mask_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :4].data defaults = prior.data match(truths, defaults, all_priors.cuda().data, mask_t, idx) if GPU: mask_t = mask_t.cuda() # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, mask_t.view(-1, 1)) zeros = torch.tensor(0).cuda() pos = mask_t > zeros # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) cls_mask = torch.sum( (pos + neg).view(num, -1, self.num_anchors), 2) > 0 loc_mask = torch.sum(pos.view(num, -1, self.num_anchors), 2) > 0 cls_mask_list.append(cls_mask.detach()) loc_mask_list.append(loc_mask.detach()) return loc_mask_list, cls_mask_list
def forward(self, predictions, targets): loc_data, conf_data = predictions num_images = loc_data.size(0) num_priors = (self.priors.size(0)) loc_t = torch.Tensor(num_images, num_priors, 4) conf_t = torch.LongTensor(num_images, num_priors) priors = self.priors.data for idx in range(num_images): gt_boxes = targets[idx][:, :-1].data labels = targets[idx][:, -1].data loc, conf = boxlib.match(gt_boxes, priors, labels, self.args.overlap_th, self.args.variance) loc_t[idx], conf_t[idx] = loc, conf if self.args.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # calculate location loss pos = conf_t > 0 pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) # (n_images, n_priors, 4) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # (cx, cy, w, h) # hard negative mining batch_conf = conf_data.view(-1, self.args.num_classes) # conf_data.shape = (n_images, n_priors, 2) loss_c = boxlib.log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # calculate the -log(softmax()) = - x_y + log_sum_exp loss_c[pos.view(-1, 1)] = 0 loss_c = loss_c.view(num_images, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.args.np_ratio * num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) scores = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.args.num_classes) np_gty = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(scores, np_gty, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + α * Lloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ #import pdb; pdb.set_trace() loc_data, conf_data, landm_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :4].data labels = targets[idx][:, -1].data landms = targets[idx][:, 4:14].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() zeros = torch.tensor(0).cuda() # landm Loss (Smooth L1) # Shape: [batch,num_priors,10] pos1 = conf_t > zeros num_pos_landm = pos1.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) #import pdb #pdb.set_trace() landm_p = landm_data[pos_idx1].view(-1, 10)[:,:4] landm_t1 = landm_t[pos_idx1].view(-1, 10)[:,:4] #s1 = torch.ones(1,2) #s2 = torch.ones(1,4)*3 #s = torch.cat([s1,s2],dim=-1).cuda() loss_landm = self.wingloss(landm_p, landm_t1) #loss_landm = F.smooth_l1_loss(landm_p, landm_t1, reduction='sum') one = torch.tensor(1).cuda() pos_mafa = conf_t == one num_pos_landm2 = pos_mafa.long().sum(1, keepdim=True) N2 = max(num_pos_landm2.data.sum().float(), 1) pos_idx2 = pos_mafa.unsqueeze(pos_mafa.dim()).expand_as(landm_data) landm_p_mafa = landm_data[pos_idx2].view(-1, 10)[:,4:] landm_t1_mafa = landm_t[pos_idx2].view(-1, 10)[:,4:] s1 = torch.ones(1,2) s2 = torch.ones(1,4)*3 s = torch.cat([s1,s2],dim=-1).cuda() loss_landm_mafa = self.wingloss(landm_p_mafa*s,landm_t1_mafa*s) #loss_landm_mafa = F.smooth_l1_loss(landm_p_mafa*s,landm_t1_mafa*s,reduction='sum') #loss_landm = self.wingloss(landm_p*s, landm_t*s) #loss_landm = self.adaptivewingloss(landm_p, landm_t) pos = conf_t != zeros conf_t[pos] = 1 # eye landmark loss #pos2 = pos.unsqueeze(pos.dim()).expand_as(landm_data) #lm_eye_p = landm_data[pos2].view(-1, 10)[:,:4] #lm_eye_t = landm_t[pos2].view(-1, 10)[:,:4] #loss_landm_eye = F.smooth_l1_loss(lm_eye_p, lm_eye_t, reduction='sum') # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] one = torch.tensor(1).cuda() #import pdb #pdb.set_trace() tmp = torch.where(targets_weighted==one,torch.tensor(0.1).cuda(),torch.tensor(0.0).cuda()) conf_p[:,1] = conf_p[:,1] - tmp loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_landm /= N1 loss_landm_mafa /= N2 loss_lm = loss_landm + loss_landm_mafa return loss_l, loss_c, loss_lm
def forward(self, predictions, targets, big_ssd_preds=None, distill_mask=None): loc_data, conf_data, priors = predictions if self._enable_distill: assert big_ssd_preds is not None num = loc_data.size(0) priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.zeros(num, num_priors, 4) conf_t = torch.zeros(num, num_priors, dtype=torch.long) best_priors_msk = [] for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data if self.use_half: truths = truths.half() defaults = priors.data pmsk = match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) best_priors_msk.append(pmsk) best_priors_msk = torch.stack(best_priors_msk) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t.requires_grad = False conf_t.requires_grad = False pos = conf_t > 0 assert (pos & best_priors_msk == best_priors_msk).min().item() == 1 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] loc_p = loc_data[pos].view(-1, 4) loc_t = loc_t[pos].view(-1, 4) pos_idx_l = pos msk = best_priors_msk[pos] loss_l = F.smooth_l1_loss(loc_p[~msk], loc_t[~msk], reduction='sum') loss_l += self.bpw * F.smooth_l1_loss(loc_p[msk], loc_t[msk], reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c = loss_c.view(pos.size(0), pos.size(1)) loss_c[pos] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples chosen_idx = pos | neg conf_p = conf_data[chosen_idx].view(-1, self.num_classes) targets_weighted = conf_t[pos | neg] msk = best_priors_msk[pos | neg] loss_c = F.cross_entropy(conf_p[~msk], targets_weighted[~msk], reduction='sum') loss_c += self.bpw * F.cross_entropy(conf_p[msk], targets_weighted[msk], reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = pos.sum() + best_priors_msk.sum() * (self.bpw - 1.) loss_l /= N loss_c /= N if self._enable_distill: big_loc_data, big_conf_data, _ = big_ssd_preds inv_temperature = 1 / 1. # 大网络和小网络的 prior boxes 数量不同。vgg-lite小网络没有对 38 * 38 的识别。 if distill_mask is not None: big_loc_data = big_loc_data[:, distill_mask] big_conf_data = big_conf_data[:, distill_mask] big_conf_p = big_conf_data[chosen_idx].view(-1, self.num_classes) y_softmax = F.log_softmax(conf_p * inv_temperature, dim=1) y_big_softmax = F.softmax(big_conf_p, dim=1) big_loc_p = big_loc_data[pos_idx_l].view(-1, 4) # same as loss_c and loss_l loss_c_distill = -(y_big_softmax * y_softmax).sum(dim=1).sum() loss_l_distill = F.smooth_l1_loss(loc_p, big_loc_p, reduction='sum') loss_c_distill /= N loss_l_distill /= N return loss_l, loss_c, loss_c_distill, loss_l_distill return loss_l, loss_c
def forward(self, predictions, targets): #--------------------------------------------------# # 取出预测结果的三个值:回归信息,置信度,先验框 #--------------------------------------------------# loc_data, conf_data, priors = predictions #--------------------------------------------------# # 计算出batch_size和先验框的数量 #--------------------------------------------------# num = loc_data.size(0) num_priors = (priors.size(0)) #--------------------------------------------------# # 创建一个tensor进行处理 #--------------------------------------------------# loc_t = torch.zeros(num, num_priors, 4).type(torch.FloatTensor) conf_t = torch.zeros(num, num_priors).long() if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() priors = priors.cuda() for idx in range(num): # 获得真实框与标签 truths = targets[idx][:, :-1] labels = targets[idx][:, -1] if (len(truths) == 0): continue # 获得先验框 defaults = priors #--------------------------------------------------# # 利用真实框和先验框进行匹配。 # 如果真实框和先验框的重合度较高,则认为匹配上了。 # 该先验框用于负责检测出该真实框。 #--------------------------------------------------# match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) #--------------------------------------------------# # 转化成Variable # loc_t (num, num_priors, 4) # conf_t (num, num_priors) #--------------------------------------------------# loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) # 所有conf_t>0的地方,代表内部包含物体 pos = conf_t > 0 #--------------------------------------------------# # 求和得到每一个图片内部有多少正样本 # num_pos (num, ) #--------------------------------------------------# num_pos = pos.sum(dim=1, keepdim=True) #--------------------------------------------------# # 取出所有的正样本,并计算loss # pos_idx (num, num_priors, 4) #--------------------------------------------------# pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) #--------------------------------------------------# # batch_conf (num * num_priors, num_classes) # loss_c (num, num_priors) #--------------------------------------------------# batch_conf = conf_data.view(-1, self.num_classes) # 这个地方是在寻找难分类的先验框 loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) loss_c = loss_c.view(num, -1) # 难分类的先验框不把正样本考虑进去,只考虑难分类的负样本 loss_c[pos] = 0 #--------------------------------------------------# # loss_idx (num, num_priors) # idx_rank (num, num_priors) #--------------------------------------------------# _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) #--------------------------------------------------# # 求和得到每一个图片内部有多少正样本 # num_pos (num, ) # neg (num, num_priors) #--------------------------------------------------# num_pos = pos.long().sum(1, keepdim=True) # 限制负样本数量 num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) num_neg[num_neg.eq(0)] = self.negatives_for_hard neg = idx_rank < num_neg.expand_as(idx_rank) #--------------------------------------------------# # 求和得到每一个图片内部有多少正样本 # pos_idx (num, num_priors, num_classes) # neg_idx (num, num_priors, num_classes) #--------------------------------------------------# pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) # 选取出用于训练的正样本与负样本,计算loss conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) N = torch.max(num_pos.data.sum(), torch.ones_like(num_pos.data.sum())) loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets, conf_target_stand_dist): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) bin_conf: torch.size(batch_size,num_priors,2) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, bin_conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) bin_conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data match_3_terms(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, bin_conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() bin_conf_t = bin_conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) bin_conf_t = Variable(bin_conf_t, requires_grad=False) pos = bin_conf_t > 0 fake_pos = pos + (conf_t < 0) conf_t[conf_t < 0] = 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max binary_conf across batch for hard negative mining batch_bin_conf = bin_conf_data.view(-1, 2) loss_bin = log_sum_exp(batch_bin_conf) - batch_bin_conf.gather( 1, bin_conf_t.view(-1, 1)) # Hard Negative Mining loss_bin[pos.view(-1, 1)] = 0 # filter out pos boxes for now #loss_bin[fake_pos.view(-1, 1)] = 0 # filter out fake pos boxes for now loss_bin = loss_bin.view(num, -1) _, loss_idx = loss_bin.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) neg_binary = neg ''' loss_bin = loss_bin.view(-1,1) loss_bin[fake_pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_bin = loss_bin.view(num, -1) _, loss_idx = loss_bin.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # neg_binary = neg ''' # Binary confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(bin_conf_data) neg_idx = neg.unsqueeze(2).expand_as(bin_conf_data) bin_conf_p = bin_conf_data[(pos_idx + neg_idx).gt(0)].view(-1, 2) targets_weighted = bin_conf_t[(pos + neg).gt(0)] loss_bin = F.cross_entropy(bin_conf_p, targets_weighted, reduction='sum') # Compute max conf across batch for hard negative mining # Confidence Loss Including Positive and Negative Examples # the version 0922 that I use the neg samples from binary classification as the neg for multi-classification ''' pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') ''' # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) ######################################################################### # used fake_pos or not # Hard Negative Mining #loss_c[fake_pos.view(-1, 1)] = 0 # filter out fake pos boxes for now loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) neg_multi = neg ''' loss_c = loss_c.view(-1, 1) loss_c[fake_pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) #neg_multi = neg ''' # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] #soft_label_val =smooth_label(targets_weighted, self.num_classes, eps=0.001) #loss_c = crossentropy(conf_p, soft_label_val) loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # for dist loss conf_t_new = conf_t.view(-1) nonzero_index = torch.nonzero(conf_t_new) conf_t_new = conf_t_new[nonzero_index] conf_target_stand_dist = conf_target_stand_dist.view( -1, self.num_classes) pos_conf_target_stand_dist = conf_target_stand_dist[ nonzero_index].squeeze(1) pos_dist = torch.gather(pos_conf_target_stand_dist, dim=1, index=conf_t_new) neg_nonzero_index = torch.nonzero(neg.view(-1)) neg_conf_target_stand_dist = conf_target_stand_dist[ neg_nonzero_index].squeeze(1) neg_dist = neg_conf_target_stand_dist[:, 0] dist_loss = -pos_dist.sum() + 1 / 5 * neg_dist.sum() # weight ''' conf_p_t = conf_data[(pos_idx).gt(0)].view(-1, self.num_classes) targets_weighted = conf_t[(pos).gt(0)] loss_c = F.cross_entropy(conf_p_t, targets_weighted, reduction='sum') conf_p_f = conf_data[(neg_idx).gt(0)].view(-1, self.num_classes) targets_weighted = conf_t[(neg).gt(0)] loss_c = loss_c + F.cross_entropy(conf_p_f, targets_weighted, reduction='sum') * 0.2 ''' # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g) + \beta Lbinconf(x, c)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_bin /= N dist_loss /= N return loss_l, loss_c, loss_bin, dist_loss, pos, neg_binary, neg_multi
def forward(self, predictions, targets): # 回归信息,置信度,先验框 loc_data, conf_data, priors = predictions # 计算出batch_size num = loc_data.size(0) # 取出所有的先验框 priors = priors[:loc_data.size(1), :] # 先验框的数量 num_priors = (priors.size(0)) num_classes = self.num_classes # 创建一个tensor进行处理 loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): # 获得框 truths = targets[idx][:, :-1].data # 获得标签 labels = targets[idx][:, -1].data # 获得先验框 defaults = priors.data # 找到标签对应的先验框 match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # 转化成Variable loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) # 所有conf_t>0的地方,代表内部包含物体 pos = conf_t > 0 # 求和得到每一个图片内部有多少正样本 num_pos = pos.sum(dim=1, keepdim=True) # 计算回归loss pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # 转化形式 batch_conf = conf_data.view(-1, self.num_classes) # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法 # 获得每个框预测到真实框的类的概率 loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) loss_c = loss_c.view(num, -1) loss_c[pos] = 0 # 获得每一张图新的softmax的结果 _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # 计算每一张图的正样本数量 num_pos = pos.long().sum(1, keepdim=True) # 限制负样本数量 num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # 计算正样本的loss和负样本的loss pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum() loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets, teacher_data): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_teacher, conf_teacher = teacher_data loc_data, conf_data, landm_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :4].data labels = targets[idx][:, -1].data landms = targets[idx][:, 4:14].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() zeros = torch.tensor(0).cuda() # landm Loss (Smooth L1) # Shape: [batch,num_priors,10] pos1 = conf_t > zeros num_pos_landm = pos1.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) landm_p = landm_data[pos_idx1].view(-1, 10) landm_t = landm_t[pos_idx1].view(-1, 10) loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') pos = conf_t != zeros conf_t[pos] = 1 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loc_t2 = loc_teacher[pos_idx].view(-1, 4) loss_fn = torch.nn.MSELoss(reduction='sum') loss1 = loss_fn(loc_p, loc_t) loss2 = loss_fn(loc_t2, loc_t) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') if loss1 > loss2: loss_l = loss_l + self.r_weight * loss1 # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples m = nn.Softmax(dim=1) loss_fn2 = nn.BCEWithLogitsLoss(reduction='sum') pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) conf_t2 = conf_teacher[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_soft = loss_fn2(m(conf_p), m(conf_t2)) loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') loss_c = loss_c + self.c_weight * loss_soft # import ipdb; # ipdb.set_trace() # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_landm /= N1 return loss_l, loss_c, loss_landm
def forward(self, predictions, priors, targets, pass_index=None): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) pre_conf is used to use Early reject or not ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) if priors.dim() == 2 else priors.size(1) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data if priors.dim() == 3: defaults = priors.data[idx, :, :] else: defaults = priors.data # if pass_index is not None: # defaults = defaults[pass_index_data[idx].unsqueeze(1).expand_as(defaults)].view(-1, num) # if defaults.shape[0] != 6375: # print('ERROR') match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets if pass_index is not None: pass_index_data = pass_index.data loc_t = loc_t[pass_index_data.unsqueeze(2).expand_as(loc_t)].view( -1, 4) conf_t1 = conf_t[pass_index_data] loc_data = loc_data[pass_index_data.unsqueeze(2).expand_as( loc_data)].view(-1, 4) print(conf_t1.shape[0] / num) # conf_data1 = conf_data[pass_index_data.unsqueeze(2).expand_as(conf_data)].view(-1, self.num_classes) loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] if pass_index is not None: conf_t1 = Variable(conf_t1, requires_grad=False) pos = conf_t1 > 0 pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) else: pos = conf_t > 0 pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) pos = conf_t > 0 # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # ER if pass_index is None: x_max = batch_conf.data.max() temp = torch.exp(batch_conf[:, 0] - x_max) / torch.sum( torch.exp(batch_conf - x_max), 1) # print(temp.data.max()) temp = temp < 0.99 temp_idx = temp.view(num, -1) # Hard Negative Mining loss_c[pos.view(-1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) if pass_index is not None: loss_c[1 - pass_index_data] = 0 _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum() loss_l /= N loss_c /= N if pass_index is None: index = (pos + temp_idx).gt(0) else: index = None return loss_l, loss_c, index
# wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t,requires_grad=False) pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1,4) loc_t = loc_t[pos_idx].view(-1,4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1,self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) # Hard Negative Mining loss_c[pos.view(-1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _,loss_idx = loss_c.sort(1, descending=True) _,idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1,keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)]
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, loc_five_data = predictions # print('loc_data shape:', loc_data.shape, '\n loc_five_data:', loc_five_data.shape) priors = priors num = loc_data.size(0) # batch size num_priors = ( priors.size(0) ) # [21824, 4] total number of r anchor in one img if img.shape = (1024, 1024) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor( num, num_priors) # LongTensor , class label for priors box match the gt loc_five = torch.Tensor(num, num_priors, 10) # for 5 location points # print("before:", loc_t,conf_t,loc_five) for idx in range(num): # truths = targets[idx][:, :-1].data # bbox, x1, y1, x2, y2: 0-1 truths = targets[idx][:, :4].data # bbox, x1, y1, x2, y2: 0-1 truths_five = targets[idx][:, 5:].data # coords 10 labels = targets[idx][:, 4].data defaults = priors.data # match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) # key important match_(self.threshold, truths, truths_five, defaults, self.variance, labels, loc_t, conf_t, loc_five, idx) # key important if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() loc_five = loc_five.cuda() # loc_t is changed when math done!!!! # should loc_five change same time!!!! pos = conf_t > 0 # only optimizer positive anchors? print("pos conf_t:", conf_t.shape, pos.shape, pos.sum()) # why all of pos is < 0???? # print("after:",loc_t,conf_t, loc_five) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] # print("pos_idx shape before:", pos.unsqueeze(pos.dim()).shape) pos_idx = pos.unsqueeze(pos.dim()).expand_as( loc_data) # think more time pos_idx_five = pos.unsqueeze(pos.dim()).expand_as(loc_five_data) # print("pos_idx shape after:", pos_idx.shape) loc_p = loc_data[pos_idx].view(-1, 4) # choose positive loc_p from pred # print("loc_p:", loc_p) # is empty.... loc_t = loc_t[pos_idx].view( -1, 4) # get correspond loc_t to loc_p which have matched loc_five = loc_five[pos_idx_five].view(-1, 10) loc_f = loc_five_data[pos_idx_five].view(-1, 10) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # print("loss_l:",loc_p, loc_t) loss_coords = F.mse_loss(loc_five, loc_f, reduction='sum') # print("loss_coords:",loc_five, loc_f, loss_coords) # compute five key point loss : # change the targets [4, 1, 10], predictions is three: loc_data, conf_data, loc_data_pair # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # print("sum of neg:", neg.sum(), '\n', "sum of pos:", pos.sum()) # may zero !!!!!!!! # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] # print("???????", conf_data.shape, '\n', conf_p, conf_p.shape) # conf_p may empty!!!!!!!!!! if conf_p.shape[0] == 0: print(pos_idx.shape, neg_idx.shape, conf_p) loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # print("?XX"*3) """ try: loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') except: print("wwwwwww", targets_weighted.max()) # may is -9223372036854775808 ??? """ # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + ¦ÁLloc(x,l,g) + betaLloc_f(x,l,g_f)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_coords /= N return loss_l, loss_c, loss_coords
def forward(self, preds, targets): loc_data, conf_data, priors = preds bs = loc_data.size(0) # batch size priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(bs, num_priors, 4) conf_t = torch.LongTensor(bs, num_priors) for idx in range(bs): truths = targets[idx][:, :-1].data label = targets[idx][:, -1].data defaults = priors.data match(self.threshold, truths, label, defaults, self.variance, loc_t, conf_t, idx) loc_t = loc_t.to(self.device) conf_t = conf_t.to(self.device) pos = conf_t > 0 num_pos = pos.sum(dim=1, keepdim=True) # Localization Loss (Smooth L1) # Shape: [batch, num_priors, 4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining # if diff both classes is small, the loss_c will be large batch_conf = conf_data.view(-1, num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c = loss_c.view(bs, -1) loss_c[pos] = 0 # filter out pos boxes for now _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort( 1) # idx is more small, loss is more large num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, weight=self.weight, reduction='sum') # sum of losses: L(x, c, l, g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum() loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, targets): # 回归信息,置信度,先验框 loc_data, conf_data, priors = predictions # loc_data torch.Size([4, 8732, 4]) # conf_data torch.Size([4, 8732, 3]) # priors torch.Size([8732, 4]) # 计算出batch_size num = loc_data.size(0) # 4 batchsize # 取出所有的先验框 priors = priors[:loc_data.size(1), :] # torch.Size([8732, 4]) # 先验框的数量 num_priors = (priors.size(0)) # 8732 # 创建一个tensor进行处理 loc_t = torch.Tensor(num, num_priors, 4) # torch.Size([4, 8732, 4]) conf_t = torch.LongTensor(num, num_priors) # torch.Size([4, 8732]) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() priors = priors.cuda() for idx in range(num): # 这是batchsize的长度 # 获得框 truths = targets[idx][:, :-1] # 单个图片中所有物体的位置信息 torch.Size([1, 4]) # 获得标签 labels = targets[idx][:, -1] # 单个图片的所有物体的类别信息 torch.Size([1]) # 获得先验框 defaults = priors # torch.Size([8732, 4]) # 找到标签对应的先验框 match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) # 转化成Variable # 不清楚这两行代码作用,注释掉仍然能运行 loc_t = Variable(loc_t, requires_grad=False) # torch.Size([4, 8732, 4]) conf_t = Variable(conf_t, requires_grad=False) # torch.Size([4, 8732]) # 所有conf_t>0的地方,代表内部包含物体 pos = conf_t > 0 # torch.Size([4, 8732]) # 求和得到每一个图片内部有多少正样本 num_pos = pos.sum(dim=1, keepdim=True) # torch.Size([4, 1]) # 计算回归loss pos_idx = pos.unsqueeze(pos.dim()).expand_as( loc_data) # torch.Size([4, 8732, 4]) loc_p = loc_data[pos_idx].view(-1, 4) # torch.Size([44, 4]) # 预测框的位置信息 loc_t = loc_t[pos_idx].view(-1, 4) # torch.Size([44, 4]) # 真值框的位置信息 # loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # 建议使用这行代码,上一行已废弃 # 转化形式 batch_conf = conf_data.view(-1, self.num_classes) # torch.Size([34928, 3]) # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法 # 获得每个框预测到真实框的类的概率 loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # torch.Size([34928, 1]) loss_c = loss_c.view(num, -1) # torch.Size([4, 8732]) loss_c[pos] = 0 # 获得每一张图新的softmax的结果 _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # 计算每一张图的正样本数量 num_pos = pos.long().sum(1, keepdim=True) # torch.Size([4, 1]) # 限制负样本数量 num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) # torch.Size([4, 1]) neg = idx_rank < num_neg.expand_as(idx_rank) # torch.Size([4, 8732]) # 筛选出前self.negpos_ratio*num_pos个样本 # 计算正样本的loss和负样本的loss pos_idx = pos.unsqueeze(2).expand_as( conf_data) # torch.Size([4, 8732, 3]) neg_idx = neg.unsqueeze(2).expand_as( conf_data) # torch.Size([4, 8732, 3]) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) # torch.Size([176, 3]) targets_weighted = conf_t[(pos + neg).gt(0)] # torch.Size([176]) # loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # torch.Size([]) loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # 建议使用这行代码,上一行已废弃 # torch.Size([176, 3]) # torch.Size([176]) N = num_pos.data.sum() # tensor(44, device='cuda:0') # N = num_pos.sum() # tensor(44, device='cuda:0') loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions # (batch_size,num_priors,4) / (batch_size,num_priors,num_classes) priors = priors # (num_priors,4) num = loc_data.size(0) # batch_size num_priors = (priors.size(0)) # num_priors num_classes = self.num_classes # match priors (default boxes) and ground truth boxes, #将batch_size内每张图像中pred bbox与gt bbox做匹配,进一步计算loss loc_t = torch.Tensor(num, num_priors, 4) #gt_loc定义好 conf_t = torch.LongTensor(num, num_priors) #gt_conf定义好 for idx in range(num): # 在batch_size中遍历每张图 truths = targets[idx][:,:-1].data # 前4个参数是gt bbox labels = targets[idx][:,-1].data # 最后1个参数是gt label defaults = priors.data # 这个defaults定义很好,就是SSD中预定义的default box # match函数在box_utils.py里,返回的是conf_t + loc_t,对应gt_cls,gt_offsets match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx) # match这个函数给每个ground truth匹配了最好的priors,给每个priors匹配最好的ground truth # 经过encode后的offset([g_cxcy, g_wh])->loc_t,top class label for each prior->conf_t if GPU: loc_t = loc_t.cuda()#刚刚返回的gt的位置loc_t conf_t = conf_t.cuda()#刚刚返回的gt的类别conf_t # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t,requires_grad=False) pos = conf_t > 0 # 从gt里取出正样本,正样本才需要计算loc loss # Localization Loss (Smooth L1) # loc Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) # 相当于取出所有正样本对应的index位置 loc_p = loc_data[pos_idx].view(-1,4) #取出预测结果里的正样本pred bbox,结合RFB_Net_vgg.py和detection.py,可以发现其实预测的也是offsets # 也就是保留与计算出来的positive的default box所对应的卷积生成的encode offset(相当于预测的) loc_t = loc_t[pos_idx].view(-1,4) # gt offsets也reshape loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Localization Loss (Smooth L1) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1,self.num_classes) # batch_size内所有pred bbox进行reshape操作 loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) #把conf_t变为和batch_conf相同shape,正负样本分类loss # Hard Negative Mining,仅筛选难负样本计算loss loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now,OHEM操作不考虑正样本,仅在负样本上操作 loss_c = loss_c.view(num, -1) # 按图像归类各个负样本 _,loss_idx = loss_c.sort(1, descending=True) # loss降序排序,那么仅需要选择前面的高loss即可 _,idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1,keepdim=True) # 正样本数量 num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) # 由正样本数量按1:3比例得出需保留的难负样本数量 neg = idx_rank < num_neg.expand_as(idx_rank) # 结合_,idx_rank = loss_idx.sort(1)理解,为了取出难neg pred bbox # Confidence Loss Including Positive and Negative Examples最终只有难负样本loss + 正样本loss参与模型参数的更新 pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) # pred bbox的预测结果,经过难负样本挖掘后留存下来的 targets_weighted = conf_t[(pos+neg).gt(0)] # 剩余需要计算cls gt label,包含了正负样本的gt label loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # 分类的交叉熵损失函数 # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) # N: number of matched default boxes loss_l/=N loss_c/=N return loss_l,loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions # loc_data shape: tensor.Size(64, 21824, 4) # # conf_data shape: torch.Size([64, 21824, 2]) priors = priors # priors shape: torch.Size([21824, 4]) # priors: tensor([[0.x, 0.x, 0.x, 0.x], [0.x, 0.x, 0.x, 0.x], ...]) num = loc_data.size(0) # num: 64, this is batch size num_priors = (priors.size(0) ) # num_priors: 21824, total number of anchors # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) # loc_t: torch.Size([64, 21824, 4]) conf_t = torch.LongTensor( num, num_priors) # conf_t: torch.Size([64, 21824]) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data # threshold: 0.35 # variance: [0.1, 0.2] # idx : 0, 1, ...., or 63 which image # loc_t: [64, 21824, 4] # conf_t: [64, 21824, 2] match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # conf[best_truth_overlap < threshold] = 0 # dim = 21824, which is also the prior number # conf_t: tensor([[0, 0, ....], # [0, 0, 0, ....] # ...]) # conf_t.shape: torch.Size([64, 21824]) # loc_t torch.Size([64, 21824, 4]) pos = conf_t > 0 # torch.Size(64, 21824) # pos: tensor([[False, False, ...], num = 64 # [False, False, ...]]), # almost all false # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] """ here, loc_data = torch.Size([645, 21824, 4]) """ pos_idx = pos.unsqueeze(pos.dim()).expand_as( loc_data) # torch.Size([64, 21824, 4]) # pos_idx: tensor([[[False, False, False, False]]]) loc_p = loc_data[pos_idx].view(-1, 4) # loc_p: positive predicted sample (prior)s location, tensor([[1.074, -0.836, -0.934, 0.414]]) # loc_p.shape: torch.Size([1186, 4]), torch.Size([num of True, 4]) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') """ now we are dueling with classes """ # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) # conf_data.shape: torch.Sie([64, 21824, 2]) # batch_conf.shape: torch.Size(64x21824=1396736, 2) # batch_conf # tensor([[0.0473, -0.1172], [0.1001, 0.2789], ...]) # conf_t.shape: torch.Size([64, 21824]), # conf_t: almost all 0 # # log_sum_exp: log(softmax(batch_conf)) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, targets): # 回归信息,置信度,先验框 loc_data, conf_data, priors = predictions # 计算出batch_size num = loc_data.size(0) # 取出所有的先验框 priors = priors[:loc_data.size(1), :] # 先验框的数量 num_priors = (priors.size(0)) num_classes = self.num_classes # 创建一个tensor进行处理 loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): # 获得框 truths = targets[idx][:, :-1].data # 获得标签 labels = targets[idx][:, -1].data # 获得先验框 defaults = priors.data # 找到标签对应的先验框 !!! match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) # if self.use_gpu: # loc_t = loc_t.cuda() # conf_t = conf_t.cuda() # 所有conf_t>0的地方,代表内部包含物体 pos = conf_t > 0 # 求和得到每一个图片内部有多少正样本 num_pos = pos.sum(dim=1, keepdim=True) # 回归 # 计算回归loss,使用正样本的先验框进行计算,背景框无意义 pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # 计算置信度loss,分类 # 转化形式 batch_conf = conf_data.view(-1, self.num_classes) # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法 # 获得每个框预测到真实框的类的概率 loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # 对预测结果分布进行修改 批量归一化 loss_c = loss_c.view(num, -1) loss_c[pos] = 0 # 获得每一张图新的softmax的结果 _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # 计算每一张图的正样本数量 num_pos = pos.long().sum(1, keepdim=True) # 限制负样本数量 num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # 计算正样本的loss和负样本的loss # 平衡正负样本1:3 8732个框 可能只有十几个正样本 pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] # loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) size_average (bool, optional) – 默认情况下,是mini-batchloss的平均值,然而,如果size_average=False,则是mini-batchloss的总和 loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') #改版后使用sum N = num_pos.data.sum() loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ # predictions.shape == ([32, 16800, 4]), ([32, 16800, 2]), ([32, 16800, 10]) # priors.shape == [16800, 4] # len(targets) == 32, targets[0].shape == [num_of_faces_in_this_image, 15], 15 = 4(bbox) + 5*2(lmk) + 1(have_lmk) loc_data, conf_data, landm_data = predictions priors = priors num = loc_data.size(0) # 32 batch num_priors = (priors.size(0)) # 16800 anchor # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :4].data # bbox labels = targets[idx][:, -1].data # have_landmark landms = targets[idx][:, 4:14].data # everyone's lmk defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() # print(loc_t.shape) # [32, 16800, 4] # print(conf_t.shape) # [32, 16800] # print(landm_t.shape) # [32, 16800, 10] # exit() # LANDMARK LOSS zeros = torch.tensor(0).cuda() # landm Loss (Smooth L1) # Shape: [batch,num_priors,10] pos1 = conf_t > zeros num_pos_landm = pos1.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) landm_p = landm_data[pos_idx1].view(-1, 10) # print(landm_p.shape) # [xxxx,10] landm_t = landm_t[pos_idx1].view(-1, 10) # print(landm_t.shape) # [xxxx,10] loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') # print(loss_landm) # xxxxx.xxxx # LOCALIZATION LOSS pos = conf_t != zeros conf_t[pos] = 1 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # CLASSIFICATION LOSS # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_landm /= N1 return loss_l, loss_c, loss_landm
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ #[ batch_size,num_priors,4 ] #[batch_size,num_priors,21] #[batch_size,num_priors,2] loc_data, conf_data, obj_data = predictions priors = priors #bath_size num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) obj_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) obj_t = conf_t.clone() obj_t[conf_t > 0] = 1 if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() obj_t = obj_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) obj_t = Variable(obj_t, requires_grad=False) obj_conf_data = obj_data[:, :, 1].detach() #print(obj_data.requires_grad) #print(obj_conf_data.requires_grad) pos = conf_t > 0 #print(type(pos)) neg_positive = obj_conf_data < self.obj_score #print(type(neg_positive)) neg_positive = (pos + neg_positive) > 2 pos = pos - neg_positive #print(pos.type()) #byte tensor # for pose conf_t > 0 ---> 1 # for neg_positive conf < obj_score --> 1 # 1 1 -> 0 # focus # 1 0 -> 1 # 0 1 -> 0 # 0 0 -> 0 #print(type(pos)) #pos = ( conf_t > 0 ) - (obj_conf_data <= self.obj_score) #pos[ (obj_conf_data < self.obj_score).detach()] = 0 if pos.data.long().sum() == 0: pos = conf_t > 0 #print('conf_t shape:'+str(conf_t.shape)) #print('conf_t >0 shape:'+str( (conf_t>0).sum())) #print('obj_t > obj_score'+str( (obj_t > self.obj_score).sum() )) #print('pos shape:'+str( pos.sum() )) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) pos_obj = obj_t > 0 #print('pos_obj shape: '+str(pos_obj.shape)) batch_obj_conf = obj_data.view(-1, 2) loss_obj = log_sum_exp(batch_obj_conf) - batch_obj_conf.gather( 1, obj_t.view(-1, 1)) loss_obj[pos_obj] = 0 loss_obj = loss_obj.view(num, -1) _, loss_obj_idx = loss_obj.sort(1, descending=True) _, idx_obj_rank = loss_obj_idx.sort(1) num_obj_pos = pos_obj.long().sum(1, keepdim=True) num_obj_neg = torch.clamp(self.negpos_ratio * num_obj_pos, max=pos_obj.size(1) - 1) #print('num_obj_pos:'+str(num_obj_pos.shape)) #print('num_obj_neg:'+str(num_obj_neg.shape)) neg_obj = idx_obj_rank < num_obj_neg.expand_as(idx_obj_rank) pos_obj_idx = pos_obj.unsqueeze(2).expand_as(obj_data) neg_obj_idx = neg_obj.unsqueeze(2).expand_as(obj_data) conf_obj_p = obj_data[(pos_obj_idx + neg_obj_idx).gt(0)].view(-1, 2) targets_weighted = obj_t[(pos_obj + neg_obj).gt(0)] loss_obj = F.cross_entropy(conf_obj_p, targets_weighted, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) #print('num_pos:'+str(num_pos.sum())) #print('num_neg:'+str(num_neg.sum())) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] #print('conf_p.shape'+str(conf_p.shape)) #print('targets_weighted'+str(targets_weighted.shape)) loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum(), 1) loss_l /= N * 1.0 loss_c /= N * 1.0 N1 = max(num_obj_neg.data.sum(), 1) loss_obj /= N1 loss_obj = 0.4 * loss_obj return loss_l, loss_c, loss_obj
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) bin_conf: torch.size(batch_size,num_priors,2) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, bin_conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) bin_conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data match_3_terms(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, bin_conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() bin_conf_t = bin_conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) bin_conf_t = Variable(bin_conf_t, requires_grad=False) pos = bin_conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) #[0., 47., 54., 54., 48., 69., 45., 91., 41., 189., 51., 57., 51., 43., 48., 514., 80., 107., 68., 32., 65.] loss_l_elements = F.smooth_l1_loss(loc_p, loc_t, reduction='none').sum(1) conf_t_label = conf_t[conf_t > 0] conf_t_stat = torch.zeros(num_classes).cuda() for index in range(1, num_classes): conf_t_stat[index] = (conf_t_label == index).sum() conf_t_weights = torch.zeros(num_classes).cuda() for index in range(1, num_classes): if conf_t_stat[index] > 0: conf_t_weights[index] = 1. / conf_t_stat[index] loss_l = 0 #conf_t_weights = torch.exp(1-torch.tensor([0., 10., 12., 12., 10., 15., 11., 17., 16., 34., 14., 19., 15., 12., 10., 66., 16., 10., # 17., 10., 15.])/66).cuda() conf_t_weights = torch.exp( torch.tensor([ 2, 1.3930, 1.2377, 2.3323, 1.7176, 2.0606, 1.3950, 1.0631, 1.7324, 1.3473, 1.6245, 1.1843, 2.2063, 1.3083, 1.3608, 1.2330, 1.8409, 1.6459, 1.3869, 1.4504, 1.2606 ])).cuda() conf_t_weights = torch.ones(21).cuda() for index in range(1, num_classes): loss_l += conf_t_weights[index] * ( loss_l_elements[conf_t_label == index]).sum() # Compute max binary_conf across batch for hard negative mining batch_bin_conf = bin_conf_data.view(-1, 2) loss_bin = log_sum_exp(batch_bin_conf) - batch_bin_conf.gather( 1, bin_conf_t.view(-1, 1)) # Hard Negative Mining loss_bin[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_bin = loss_bin.view(num, -1) _, loss_idx = loss_bin.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) neg_binary = neg # Binary confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(bin_conf_data) neg_idx = neg.unsqueeze(2).expand_as(bin_conf_data) bin_conf_p = bin_conf_data[(pos_idx + neg_idx).gt(0)].view(-1, 2) targets_weighted = bin_conf_t[(pos + neg).gt(0)] loss_bin_elements = F.cross_entropy(bin_conf_p, targets_weighted, reduction='none') conf_t_f_label_for_bin = conf_t[(pos + neg) > 0] conf_t_f_stat_for_bin = torch.zeros(num_classes).cuda() for index in range(0, num_classes): conf_t_f_stat_for_bin[index] = ( conf_t_f_label_for_bin == index).sum() conf_t_f_weights_for_bin = torch.zeros(num_classes).cuda() for index in range(0, num_classes): if conf_t_f_stat_for_bin[index] > 0: conf_t_f_weights_for_bin[ index] = 1. / conf_t_f_stat_for_bin[index] loss_b = 0 #conf_t_f_weights_for_bin = torch.exp(1 - torch.tensor( # [0., 10., 12., 12., 10., 15., 11., 17., 16., 34., 14., 19., 15., 12., 10., 66., 16., 10., # 17., 10., 15.]) / 66).cuda() conf_t_f_weights_for_bin = torch.exp( torch.tensor([ 2, 1.3930, 1.2377, 2.3323, 1.7176, 2.0606, 1.3950, 1.0631, 1.7324, 1.3473, 1.6245, 1.1843, 2.2063, 1.3083, 1.3608, 1.2330, 1.8409, 1.6459, 1.3869, 1.4504, 1.2606 ])).cuda() conf_t_f_weights_for_bin = torch.ones(21).cuda() conf_t_f_weights_for_bin[0] *= 3 for index in range(0, num_classes): loss_b += conf_t_f_weights_for_bin[index] * ( loss_bin_elements[conf_t_f_label_for_bin == index]).sum() # Compute max binary_conf across batch for hard negative mining batch_bin_conf = bin_conf_data.view(-1, 2) batch_conf = conf_data.view(-1, self.num_classes - 1) P_k = (batch_conf[:, ].t() + batch_bin_conf[:, 1]).t() P_0 = batch_bin_conf[:, 0].unsqueeze(1) + torch.log( torch.exp(batch_conf).sum(dim=1, keepdim=True)) P_logit = torch.cat((P_0, P_k), dim=1).view(num, -1, self.num_classes) # Compute max conf across batch for hard negative mining batch_P_logit = P_logit.view(-1, self.num_classes) loss_c = log_sum_exp(batch_P_logit) - batch_P_logit.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) neg_multi = neg # Confidence Loss Including Positive and Negative Examples ''' pos_idx = pos.unsqueeze(2).expand_as(P_logit) neg_idx = neg.unsqueeze(2).expand_as(P_logit) conf_p = P_logit[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_cls = F.cross_entropy(conf_p, targets_weighted, reduction='sum') ''' pos_idx = pos.unsqueeze(2).expand_as(P_logit) conf_p = P_logit[(pos_idx).gt(0)].view(-1, self.num_classes) targets_weighted = conf_t[(pos).gt(0)] loss_mul_elements = F.cross_entropy(conf_p, targets_weighted, reduction='none') conf_t_f_label_for_mul = conf_t[(pos) > 0] conf_t_f_stat_for_mul = torch.zeros(num_classes).cuda() for index in range(1, num_classes): conf_t_f_stat_for_mul[index] = ( conf_t_f_label_for_mul == index).sum() conf_t_f_weights_for_mul = torch.zeros(num_classes).cuda() for index in range(1, num_classes): if conf_t_f_stat_for_mul[index] > 0: conf_t_f_weights_for_mul[ index] = 1. / conf_t_f_stat_for_mul[index] loss_cls = 0 #conf_t_f_weights_for_mul = torch.exp(1 - torch.tensor( # [0., 10., 12., 12., 10., 15., 11., 17., 16., 34., 14., 19., 15., 12., 10., 66., 16., 10., # 17., 10., 15.]) / 66).cuda() conf_t_f_weights_for_mul = torch.exp( torch.tensor([ 2, 1.3930, 1.2377, 2.3323, 1.7176, 2.0606, 1.3950, 1.0631, 1.7324, 1.3473, 1.6245, 1.1843, 2.2063, 1.3083, 1.3608, 1.2330, 1.8409, 1.6459, 1.3869, 1.4504, 1.2606 ])).cuda() conf_t_f_weights_for_mul = torch.ones(21).cuda() for index in range(1, num_classes): loss_cls += conf_t_f_weights_for_mul[index] * ( loss_mul_elements[conf_t_f_label_for_mul == index]).sum() # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g) + \beta Lbinconf(x, c)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_cls /= N loss_b /= N return loss_l, loss_cls, loss_b, pos, neg_binary, neg_multi
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:,:-1].data labels = targets[idx][:,-1].data defaults = priors.data match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t,requires_grad=False) pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1,4) loc_t = loc_t[pos_idx].view(-1,4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1,self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) # Hard Negative Mining loss_c[pos.view(-1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _,loss_idx = loss_c.sort(1, descending=True) _,idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1,keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum() loss_l/=N loss_c/=N return loss_l,loss_c
def forward(self, predictions, targets, use_arm=False, filter_object=False, filter_score=0, debug=False): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ # arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions if use_arm: arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions else: loc_data, conf_data, _, _, priors = predictions num = loc_data.size(0) priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) defaults = priors.data for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data if self.num_classes == 2: labels = labels > 0 if use_arm: bbox_weight = refine_match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, arm_loc_data[idx].data, use_weight=False) else: match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) if use_arm and filter_object: P = F.softmax(arm_conf_data, 2) arm_conf_data_temp = P[:, :, 1] object_score_index = arm_conf_data_temp <= self.object_score pos = conf_t > 0 pos[object_score_index.detach()] = 0 else: pos = conf_t > 0 num_pos = pos.sum(1, keepdim=True) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) if debug: if use_arm: print("odm pos num: ", str(loc_t.size(0)), str(loc_t.size(1))) else: print("arm pos num", str(loc_t.size(0)), str(loc_t.size(1))) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) N = num_pos.data.sum() loss_l /= float(N) loss_c /= float(N) return loss_l, loss_c
def forward(self, predictions, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, priors = predictions num = loc_data.size(0) priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data if self.num_classes == 2: labels = labels > 0 defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) loc_t = loc_t.cuda() conf_t = conf_t.cuda() pos = conf_t > 0 num_pos = pos.sum(1, keepdim=True) if self.OHEM: # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_hard = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_hard[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_hard = loss_hard.view(num, -1) _, loss_idx = loss_hard.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) if num_pos.data.sum() > 0: num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) else: fake_num_pos = torch.ones(32, 1).long() * 15 num_neg = torch.clamp(self.negpos_ratio * fake_num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) else: loss_c = F.cross_entropy(conf_p, conf_t, size_average=False) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] if num_pos.data.sum() > 0: pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) N = num_pos.data.sum() else: loss_l = torch.zeros(1) N = 1.0 loss_l /= float(N) loss_c /= float(N) return loss_l, loss_c
def forward(self, predictions, priors, targets): #--------------------------------------------------------------------# # 取出预测结果的三个值:框的回归信息,置信度,人脸关键点的回归信息 #--------------------------------------------------------------------# loc_data, conf_data, landm_data = predictions #--------------------------------------------------# # 计算出batch_size和先验框的数量 #--------------------------------------------------# priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) #--------------------------------------------------# # 创建一个tensor进行处理 #--------------------------------------------------# loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): # 获得真实框与标签 truths = targets[idx][:, :4].data labels = targets[idx][:, -1].data landms = targets[idx][:, 4:14].data # 获得先验框 defaults = priors.data #--------------------------------------------------# # 利用真实框和先验框进行匹配。 # 如果真实框和先验框的重合度较高,则认为匹配上了。 # 该先验框用于负责检测出该真实框。 #--------------------------------------------------# match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) #--------------------------------------------------# # 转化成Variable # loc_t (num, num_priors, 4) # conf_t (num, num_priors) # landm_t (num, num_priors, 10) #--------------------------------------------------# zeros = torch.tensor(0) if self.cuda: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() zeros = zeros.cuda() #------------------------------------------------------------------------# # 有人脸关键点的人脸真实框的标签为1,没有人脸关键点的人脸真实框标签为-1 # 所以计算人脸关键点loss的时候pos1 = conf_t > zeros # 计算人脸框的loss的时候pos = conf_t != zeros #------------------------------------------------------------------------# pos1 = conf_t > zeros pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) landm_p = landm_data[pos_idx1].view(-1, 10) landm_t = landm_t[pos_idx1].view(-1, 10) loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') pos = conf_t != zeros pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') #--------------------------------------------------# # batch_conf (num * num_priors, 2) # loss_c (num, num_priors) #--------------------------------------------------# conf_t[pos] = 1 batch_conf = conf_data.view(-1, self.num_classes) # 这个地方是在寻找难分类的先验框 loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # 难分类的先验框不把正样本考虑进去,只考虑难分类的负样本 loss_c[pos.view(-1, 1)] = 0 loss_c = loss_c.view(num, -1) #--------------------------------------------------# # loss_idx (num, num_priors) # idx_rank (num, num_priors) #--------------------------------------------------# _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) #--------------------------------------------------# # 求和得到每一个图片内部有多少正样本 # num_pos (num, ) # neg (num, num_priors) #--------------------------------------------------# num_pos = pos.long().sum(1, keepdim=True) # 限制负样本数量 num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) #--------------------------------------------------# # 求和得到每一个图片内部有多少正样本 # pos_idx (num, num_priors, num_classes) # neg_idx (num, num_priors, num_classes) #--------------------------------------------------# pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) # 选取出用于训练的正样本与负样本,计算loss conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N num_pos_landm = pos1.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) loss_landm /= N1 return loss_l, loss_c, loss_landm
def forward(self, predictions, priors, targets): loc_data, conf_data, landm_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :4].data labels = targets[idx][:, -1].data landms = targets[idx][:, 4:14].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) zeros = torch.tensor(0) if self.cuda: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() zeros = zeros.cuda() # landm Loss (Smooth L1) # Shape: [batch,num_priors,10] pos1 = conf_t > zeros num_pos_landm = pos1.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) landm_p = landm_data[pos_idx1].view(-1, 10) landm_t = landm_t[pos_idx1].view(-1, 10) loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') pos = conf_t != zeros conf_t[pos] = 1 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_landm /= N1 return loss_l, loss_c, loss_landm
def forward(self, predictions, targets): loc_data, conf_data, priors = predictions # get batch_size num = loc_data.size(0) # get all default boxes priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): # get the box truths = targets[idx][:, :-1].data # get the label labels = targets[idx][:, -1].data # get the data defaults = priors.data # get the default box corresponding to the label match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # convert to Variable loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) # conf_t > 0 pos = conf_t > 0 # num of pos-samples around the box num_pos = pos.sum(dim=1, keepdim=True) # loss pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) batch_conf = conf_data.view(-1, self.num_classes) # softmax loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) loss_c = loss_c.view(num, -1) loss_c[pos] = 0 # softmax for each picture _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # pos-samples num num_pos = pos.long().sum(1, keepdim=True) # constrain the num of nag-samples num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # pos-loss + nag-loss pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) N = num_pos.data.sum() loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size, num_priors, num_classes) # n, 3(anchor), 3 loc shape: torch.size(batch_size,num_priors,4) # n, 3, 4 as we only count each anchor for only one cls not cal for every cls, for rcnn it does for every cls priors shape: torch.size(num_priors, 4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size, num_objs, 5] (last idx is the label). """ loc_data, conf_data, landm_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :4].data labels = targets[ idx][:, -1].data # label only has 1 and -1 and 2, 1 and 2 used for cls landms = targets[idx][:, 4:14].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) if 1: # use gpu loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() # cos label file including -1 landmarks so ignore this data for landmark regression zeros = torch.tensor(0).cuda() pos = conf_t > zeros num_pos_landm = pos.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) # 1. get index for face class face_tensor = torch.tensor(1).cuda() # Shape: [batch,num_priors,10] face_pos = conf_t == face_tensor face_pos_idx = face_pos.unsqueeze(face_pos.dim()).expand_as( landm_data) # 32, 16800, 10 face_landm_p = landm_data[face_pos_idx].view(-1, 10) face_landm_t = landm_t[face_pos_idx].view(-1, 10) # 2. get index for mask class, set all these target landmarks to 0. # conf_t is target and anchor, if anchor matched target bbox cls 1, then the conf_t is 1, if anchor matched cls2, conf_t is 2 # after match conf_t, we can get all cls2's anchors. mask_tensor = torch.tensor(2).cuda() mask_pos = conf_t == mask_tensor mask_pos_idx = mask_pos.unsqueeze(mask_pos.dim()).expand_as(landm_data) mask_landm_p = landm_data[mask_pos_idx].view(-1, 10) mask_landm_t = landm_t[mask_pos_idx].view(-1, 10) mask_landm_p[:, 4:] = 0 mask_landm_t[:, 4:] = 0 landm_p = torch.cat([face_landm_p, mask_landm_p], 0) landm_t = torch.cat([face_landm_t, mask_landm_t], 0) loss_landm = F.smooth_l1_loss(landm_p, landm_t) ############################################################################################################## zeros = torch.tensor(0).cuda() pos = conf_t != zeros # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t) ############################################################################################################## # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) # Note here, as label file including -1 label, so we have to make them to 0~2 no_landmark_pos = conf_t < zeros conf_t[no_landmark_pos] = 1 loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted) ############################################################################################################## # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_landm /= N1 return loss_l, loss_c, loss_landm
def forward(self, odm_data, priors, targets, arm_data=None, filter_object=False): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). arm_data (tuple): arm branch containg arm_loc and arm_conf filter_object: whether filter out the prediction according to the arm conf score """ loc_data, conf_data = odm_data if arm_data: arm_loc, arm_conf = arm_data priors = priors.data num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data #for object detection if self.num_classes == 2: labels = labels > 0 if arm_data: refine_match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx, arm_loc[idx].data) else: match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) if arm_data and filter_object: arm_conf_data = arm_conf.data[:, :, 1] pos = conf_t > 0 object_score_index = arm_conf_data <= self.object_score pos[object_score_index] = 0 else: pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum() loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ #loc_data, conf_data, _ = predictions loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0) ) #11620 all feature_maps grid default bboxes number num_classes = self.num_classes # match priors (default boxes) and ground truth boxes """ for循环中的代码是为了将输入的target改造成网络的学习目标,也就是计算损失时的target最终得到的是loc_t和conf_t 注:对于Tensor来说,在子函数中修改其值,原有的值也会跟着改变,因此match函数无返回值 """ loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data # 真实loc labels = targets[idx][:, -1].data # 真实label defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) # gt 和 default boxes if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) """ conf_t > 0等价于torch.gt(conf_t,0)或者conf_t.gt(0) 返回和conf_t同形状的Tensor,符合条件的为1,否则为0 """ pos = conf_t > 0 #忽略背景,pos是mask # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) # predictions loc_t = loc_t[pos_idx].view(-1, 4) # encoded offsets to learn loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining """ conf_data Shape:[batch,num_priors,num_classes] batch_conf Shape:[batch*num_priors,num_classes] 因为pytorch中cross_entropy的input要求为[N,C]的2-d Tensor """ batch_conf = conf_data.view(-1, self.num_classes) #predictions """ conf_t的shape为[batch,num_priors],其中选中的正样本为相应的类别,未选中的为0 Tensor.gather(dim,index)在dim维度上,按照index = 1 。此处就是计算cross_entropy的x[class]项 loss(x,class) = −x[class]+log(∑jexp(x[j])) """ loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) #为了筛选负样本 # Hard Negative Mining """ 先将正样本loss置为0,然后对loss排序(每张图片内部挑选)之后,取前self.negpos_ratio*num_pos个负样本的loss """ loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes,为了选择负样本。 """ 下一步loss_c shape转变为[batch,num_priors] 下面这种挑选前n个数的操作 """ loss_c = loss_c.view(num, -1) a_, loss_idx = loss_c.sort(1, descending=True) # print('loss_idx : ',loss_idx) # print('loss_idx : ',loss_idx.size(),a_.size()) # print('a_ : ',a_) b_, idx_rank = loss_idx.sort(1) # print('idx_rank : ',idx_rank) # print('b_ : ',b_) # print('loss_idx : ',idx_rank.size(),b_.size()) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) # 夹紧范围,限制 num_neg的范围 neg = idx_rank < num_neg.expand_as(idx_rank) # 负样本 index 掩码 # print(neg.size(),' neg',neg) # print('neg sum : ',neg.sum()) # print('num_pos : ',num_pos.sum(),'\n',num_pos) # Confidence Loss Including Positive and Negative Examples """ 上面几步的操作就是为获得pos_idx和neg_idx conf_data 的shape为[batch,num_priors,num_classes] """ pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) """ (pos_idx+neg_idx).gt(0)的原因个人猜测可能是因为挑选的正样本和负样本可能会重复,因此将大于1的数变成1. """ # gt举例:torch.gt(x,1)# tensor 大于 1 conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c