def forward(self, predictions, targets): loc_data, conf_data, dbox_list = predictions #(batch_num, num_dbox, num_classes) num_batch = loc_data.size(0) num_dbox = loc_data.size(1) #8732 num_classes = conf_data.size(2) conf_t_label = torch.LongTensor(num_batch, num_dbox).to(self.device) loc_t = torch.Tensor(num_batch, num_dbox, 4).to(self.device) for idx in range(num_batch): truths = targets[idx][:, :-1].to( self.device) #(xmin, ymin, xmax, ymax) BBox labels = targets[idx][:, -1].to(self.device) #label dbox = dbox_list.to(self.device) variances = [0.1, 0.2] match(self.jaccard_threshold, truths, dbox, variances, labels, loc_t, conf_t_label, idx) #SmoothL1Loss pos_mask = conf_t_label > 0 # loc_data(num_batch, 8732, 4) pos_idx = pos_mask.unsqueeze(pos_mask.dim()).expand_as(loc_data) # positive dbox, loc_data loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_loc = F.smooth_l1_loss(loc_p, loc_t, reduction="sum") #loss_conf #CrossEntropy batch_conf = conf_data.view( -1, num_classes) #(num_batch*num_box, num_classes) loss_conf = F.cross_entropy(batch_conf, conf_t_label.view(-1), reduction="none") # hard negative mining num_pos = pos_mask.long().sum(1, keepdim=True) loss_conf = loss_conf.view(num_batch, -1) # torch.size([num_batch, 8732]) _, loss_idx = loss_conf.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # idx_rank chính là thông số để biết được độ lớn loss nằm ở vị trí bao nhiêu num_neg = torch.clamp(num_pos * self.neg_pos, max=num_dbox) neg_mask = idx_rank < (num_neg).expand_as(idx_rank) #(num_batch, 8732) -> (num_batch, 8732, 21) pos_idx_mask = pos_mask.unsqueeze(2).expand_as(conf_data) neg_idx_mask = neg_mask.unsqueeze(2).expand_as(conf_data) conf_t_pre = conf_data[(pos_idx_mask + neg_idx_mask).gt(0)].view( -1, num_classes) conf_t_label_ = conf_t_label[(pos_mask + neg_mask).gt(0)] loss_conf = F.cross_entropy(conf_t_pre, conf_t_label_, reduction="sum") # total loss = loss_loc + loss_conf N = num_pos.sum() loss_loc = loss_loc / N loss_conf = loss_conf / N return loss_loc, loss_conf
def forward(self, predictions, targets, use_arm=False, filter_object=False, debug=False): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ # arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions if use_arm: arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions else: loc_data, conf_data, _, _, priors = predictions num = loc_data.size(0) priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) loc_g = torch.Tensor(num, num_priors, 4) defaults = priors.data for idx in range(num): predicts = loc_data[idx].data truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data if self.num_classes == 2: labels = labels > 0 if use_arm: bbox_weight = refine_match( self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, arm_loc_data[idx].data, use_weight=False) else: match(self.threshold, predicts, truths, defaults, self.variance, labels, loc_t, loc_g, conf_t, idx) loc_t = loc_t.cuda() loc_g = loc_g.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) loc_g = Variable(loc_g, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) if use_arm and filter_object: P = F.softmax(arm_conf_data, 2) arm_conf_data_temp = P[:, :, 1] object_score_index = arm_conf_data_temp <= self.object_score pos = conf_t > 0 pos[object_score_index.detach()] = 0 else: pos = conf_t > 0 num_pos = pos.sum(1, keepdim=True) if debug: if use_arm: print("odm pos num: ", str(loc_t.size(0)), str(loc_t.size(1))) else: print("arm pos num", str(loc_t.size(0)), str(loc_t.size(1))) if self.OHEM: # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) if num_pos.data.sum() > 0: num_neg = torch.clamp( self.negpos_ratio * num_pos, max=pos.size(1) - 1) else: fake_num_pos = torch.ones(32, 1).long() * 15 num_neg = torch.clamp( self.negpos_ratio * fake_num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy( conf_p, targets_weighted, size_average=False) else: loss_c = F.cross_entropy(conf_p, conf_t, size_average=False) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] if num_pos.data.sum() > 0: pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) if not use_arm: loc_g = loc_g[pos_idx].view(-1, 4) priors = priors.expand_as(pos_idx) priors = priors[pos_idx].view(-1, 4) # c = torch.randn((5,16340,4)) # c = c.cuda() # c = Variable(c, requires_grad=False) # c = c[pos_idx].view(-1, 4) repul_loss = RepulsionLoss(sigma=0., variance=self.variance) loss_l_repul = repul_loss(loc_p, loc_g, priors) N = num_pos.data.sum() else: loss_l = torch.zeros(1) N = 1.0 loss_l /= float(N) loss_c /= float(N) if not use_arm: loss_l_repul /= float(N) return loss_l, loss_c, loss_l_repul return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions # loc_data shape: tensor.Size(64, 21824, 4) # # conf_data shape: torch.Size([64, 21824, 2]) priors = priors # priors shape: torch.Size([21824, 4]) # priors: tensor([[0.x, 0.x, 0.x, 0.x], [0.x, 0.x, 0.x, 0.x], ...]) num = loc_data.size(0) # num: 64, this is batch size num_priors = (priors.size(0) ) # num_priors: 21824, total number of anchors # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) # loc_t: torch.Size([64, 21824, 4]) conf_t = torch.LongTensor( num, num_priors) # conf_t: torch.Size([64, 21824]) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data # threshold: 0.35 # variance: [0.1, 0.2] # idx : 0, 1, ...., or 63 which image # loc_t: [64, 21824, 4] # conf_t: [64, 21824, 2] match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # conf[best_truth_overlap < threshold] = 0 # dim = 21824, which is also the prior number # conf_t: tensor([[0, 0, ....], # [0, 0, 0, ....] # ...]) # conf_t.shape: torch.Size([64, 21824]) # loc_t torch.Size([64, 21824, 4]) pos = conf_t > 0 # torch.Size(64, 21824) # pos: tensor([[False, False, ...], num = 64 # [False, False, ...]]), # almost all false # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] """ here, loc_data = torch.Size([645, 21824, 4]) """ pos_idx = pos.unsqueeze(pos.dim()).expand_as( loc_data) # torch.Size([64, 21824, 4]) # pos_idx: tensor([[[False, False, False, False]]]) loc_p = loc_data[pos_idx].view(-1, 4) # loc_p: positive predicted sample (prior)s location, tensor([[1.074, -0.836, -0.934, 0.414]]) # loc_p.shape: torch.Size([1186, 4]), torch.Size([num of True, 4]) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') """ now we are dueling with classes """ # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) # conf_data.shape: torch.Sie([64, 21824, 2]) # batch_conf.shape: torch.Size(64x21824=1396736, 2) # batch_conf # tensor([[0.0473, -0.1172], [0.1001, 0.2789], ...]) # conf_t.shape: torch.Size([64, 21824]), # conf_t: almost all 0 # # log_sum_exp: log(softmax(batch_conf)) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, targets): loc_data, conf_data, priors = predictions # get batch_size num = loc_data.size(0) # get all default boxes priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): # get the box truths = targets[idx][:, :-1].data # get the label labels = targets[idx][:, -1].data # get the data defaults = priors.data # get the default box corresponding to the label match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # convert to Variable loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) # conf_t > 0 pos = conf_t > 0 # num of pos-samples around the box num_pos = pos.sum(dim=1, keepdim=True) # loss pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) batch_conf = conf_data.view(-1, self.num_classes) # softmax loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) loss_c = loss_c.view(num, -1) loss_c[pos] = 0 # softmax for each picture _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # pos-samples num num_pos = pos.long().sum(1, keepdim=True) # constrain the num of nag-samples num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # pos-loss + nag-loss pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) N = num_pos.data.sum() loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets): #--------------------------------------------------------------------# # 取出预测结果的三个值:框的回归信息,置信度,人脸关键点的回归信息 #--------------------------------------------------------------------# loc_data, conf_data, landm_data = predictions #--------------------------------------------------# # 计算出batch_size和先验框的数量 #--------------------------------------------------# priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) #--------------------------------------------------# # 创建一个tensor进行处理 #--------------------------------------------------# loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): # 获得真实框与标签 truths = targets[idx][:, :4].data labels = targets[idx][:, -1].data landms = targets[idx][:, 4:14].data # 获得先验框 defaults = priors.data #--------------------------------------------------# # 利用真实框和先验框进行匹配。 # 如果真实框和先验框的重合度较高,则认为匹配上了。 # 该先验框用于负责检测出该真实框。 #--------------------------------------------------# match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) #--------------------------------------------------# # 转化成Variable # loc_t (num, num_priors, 4) # conf_t (num, num_priors) # landm_t (num, num_priors, 10) #--------------------------------------------------# zeros = torch.tensor(0) if self.cuda: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() zeros = zeros.cuda() #------------------------------------------------------------------------# # 有人脸关键点的人脸真实框的标签为1,没有人脸关键点的人脸真实框标签为-1 # 所以计算人脸关键点loss的时候pos1 = conf_t > zeros # 计算人脸框的loss的时候pos = conf_t != zeros #------------------------------------------------------------------------# pos1 = conf_t > zeros pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) landm_p = landm_data[pos_idx1].view(-1, 10) landm_t = landm_t[pos_idx1].view(-1, 10) loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') pos = conf_t != zeros pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') #--------------------------------------------------# # batch_conf (num * num_priors, 2) # loss_c (num, num_priors) #--------------------------------------------------# conf_t[pos] = 1 batch_conf = conf_data.view(-1, self.num_classes) # 这个地方是在寻找难分类的先验框 loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # 难分类的先验框不把正样本考虑进去,只考虑难分类的负样本 loss_c[pos.view(-1, 1)] = 0 loss_c = loss_c.view(num, -1) #--------------------------------------------------# # loss_idx (num, num_priors) # idx_rank (num, num_priors) #--------------------------------------------------# _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) #--------------------------------------------------# # 求和得到每一个图片内部有多少正样本 # num_pos (num, ) # neg (num, num_priors) #--------------------------------------------------# num_pos = pos.long().sum(1, keepdim=True) # 限制负样本数量 num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) #--------------------------------------------------# # 求和得到每一个图片内部有多少正样本 # pos_idx (num, num_priors, num_classes) # neg_idx (num, num_priors, num_classes) #--------------------------------------------------# pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) # 选取出用于训练的正样本与负样本,计算loss conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N num_pos_landm = pos1.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) loss_landm /= N1 return loss_l, loss_c, loss_landm
def forward(self, predictions, targets): # 回归信息,置信度,先验框 loc_data, conf_data, priors = predictions # 计算出batch_size num = loc_data.size(0) # 取出所有的先验框 priors = priors[:loc_data.size(1), :] # 先验框的数量 num_priors = (priors.size(0)) # 创建一个tensor进行处理 loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() priors = priors.cuda() for idx in range(num): # 获得框 truths = targets[idx][:, :-1] # 获得标签 labels = targets[idx][:, -1] # 获得先验框 defaults = priors # 找到标签对应的先验框 match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) # 转化成Variable loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) # 所有conf_t>0的地方,代表内部包含物体 pos = conf_t > 0 # 求和得到每一个图片内部有多少正样本 num_pos = pos.sum(dim=1, keepdim=True) # 计算回归loss pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # 转化形式 batch_conf = conf_data.view(-1, self.num_classes) # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法 # 获得每个框预测到真实框的类的概率 loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) loss_c = loss_c.view(num, -1) loss_c[pos] = 0 # 获得每一张图新的softmax的结果 _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # 计算每一张图的正样本数量 num_pos = pos.long().sum(1, keepdim=True) # 限制负样本数量 num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # 计算正样本的loss和负样本的loss pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) N = num_pos.data.sum() loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:,:-1].data labels = targets[idx][:,-1].data defaults = priors.data match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t,requires_grad=False) pos = conf_t > 0 num_pos = pos.sum(dim=1, keepdim=True) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1,4) loc_t = loc_t[pos_idx].view(-1,4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') #GIoU # transform_weights = (10.,10.,10.,10.) # x1p,y1p,x2p,y2p = self.bbox_transform(loc_p,transform_weights) # x1gt,y1gt,x2gt,y2gt = self.bbox_transform(loc_t,transform_weights) # #For predicted box Bp, ensuring x2p > x1p and y2p > y1p # x1p_hat = torch.min(x1p,x2p) # x2p_hat = torch.max(x1p,x2p) # y1p_hat = torch.min(y1p,y2p) # y2p_hat = torch.max(y1p,y2p) # #Ensuring x2g > x1g and y2g > y1g # x1g = torch.min(x1gt,x2gt) # x2g = torch.max(x1gt,x2gt) # y1g = torch.min(y1gt,y2gt) # y2g = torch.max(y1gt,y2gt) # #Calculating area Bg : Ag = (x2gt - x1gt)*(y2gt - y1gt) # Ag = (x2g - x1g)*(y2g - y1g) # #Calculating area Bp : Ap = (x2p - x1p)*(y2p - y1p) # Ap = (x2p_hat - x1p_hat)*(y2p_hat - y1p_hat) # #Calculating intersection I between Bp and Bg # x1I = torch.max(x1p_hat,x1g) # x2I = torch.min(x2p_hat,x2g) # y1I = torch.max(y1p_hat,y1g) # y2I = torch.min(y2p_hat,y2g) # I=torch.zeros(loc_p.size(0)) # for i in range(loc_p.size(0)): # if(x2I[i] > x1I[i] and y2I[i] > y1I[i]): # I[i] = (x2I[i] - x1I[i])*(y2I[i] - y1I[i]) # #Finding the coordinate of smallest enclosing box Bc # x1c = torch.min(x1p_hat,x1g) # x2c = torch.max(x2p_hat,x2g) # y1c = torch.min(y1p_hat,y1g) # y2c = torch.max(y2p_hat,y2g) # #Calculating area of Bc : Ac = (x2c - x1c)*(y2c - y1c) # Ac = (x2c - x1c)*(y2c - y1c) # #IoU = I/U, where U = Ap + Ag - I # U = Ap + Ag - I # IoU = I/U # GIoU = IoU - (Ac-U)/Ac # #Loss GIoU # loss_l = torch.sum(1-GIoU) # loss_l = loss_l.mean() #Focal Loss # conf_p = conf_data.view(-1,self.num_classes) # loss_c = self.f_loss(conf_p,conf_t) #Focal Loss # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1,self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) # Hard Negative Mining loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _,loss_idx = loss_c.sort(1, descending=True) _,idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1,keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted,reduction='sum')#CATEGORICAL CROSS ENTROPY # loss_c = self.f_loss(conf_p,targets_weighted) #Focal Loss # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l/=N loss_c/=N return loss_l,loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, landm_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :4].data labels = targets[idx][:, -1].data landms = targets[idx][:, 4:14].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() zeros = torch.tensor(0).cuda() # landm Loss (Smooth L1) # Shape: [batch,num_priors,10] pos1 = conf_t > zeros num_pos_landm = pos1.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) landm_p = landm_data[pos_idx1].view(-1, 10) landm_t = landm_t[pos_idx1].view(-1, 10) loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') pos = conf_t != zeros conf_t[pos] = 1 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_landm /= N1 return loss_l, loss_c, loss_landm
def forward(self, odm_data, priors, loc_targets, cls_targets, arm_data=None, filter_object=False): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). arm_data (tuple): arm branch containg arm_loc and arm_conf filter_object: whether filter out the prediction according to the arm conf score """ loc_data, conf_data = odm_data if arm_data: arm_loc, arm_conf = arm_data num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.Tensor(num, num_priors) for idx in range(num): truths = loc_targets[idx] labels = cls_targets[idx] + 1 # background as 0 truths = truths.to(self.opt.device) labels = labels.to(self.opt.device) # for object detection if self.num_classes == 2: labels = labels > 0 if arm_data: refine_match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx, arm_loc[idx]) else: match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx) # wrap targets loc_t = loc_t conf_t = conf_t if arm_data and filter_object: arm_conf_data = arm_conf[:, :, 1] pos = conf_t > 0 object_score_index = arm_conf_data <= self.object_score pos[object_score_index] = 0 else: pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loc_t = loc_t.detach() loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1).long()) # Hard Negative Mining loss_c[pos.view(-1).long()] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1).detach() _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted.long(), size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.sum().item() loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets, using_gpu): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, landm_data = predictions priors = priors num = loc_data.size(0) # num = batch_size num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :4].data # [num_objs, 4] labels = targets[idx][:, -1].data # [num_objs] landms = targets[idx][:, 4:14].data # [num_objs, 10] defaults = priors.data # 关键函数, 实现候选框与真实框之间的匹配 match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) zeros = torch.tensor(0) if using_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() zeros = zeros.cuda() # landm Loss (Smooth L1) # Shape: [batch,num_priors,10] pos1 = conf_t > zeros # 筛选出 >0 的box下标(大部分都是=0的) # 求和, 取得满足条件的box的数量 num_pos_landm = pos1.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) landm_p = landm_data[pos_idx1].view(-1, 10) landm_t = landm_t[pos_idx1].view(-1, 10) loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') pos = conf_t != zeros conf_t[pos] = 1 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) # conf_t: [batch, num_priors] # loss_c: [batch*num_priors, 1], 计算每个priorbox预测后的损失 loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining # 难负样本挖掘, 按照loss进行排序, 取loss最大的负样本参与更新 # 将所有的pos下标的box的loss置为0(pos指示的是正样本的下标) loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now # 将 loss_c 的shape 从 [batch*num_priors, 1] 转换成 [batch, num_priors] loss_c = loss_c.view(num, -1) # 进行降序排序, 并获取到排序的下标 _, loss_idx = loss_c.sort(1, descending=True) # 将下标进行升序排序, 并获取到下标的下标 _, idx_rank = loss_idx.sort(1) # num_pos: [batch, 1], 统计每个样本中的obj个数 num_pos = pos.long().sum(1, keepdim=True) # 根据obj的个数, 确定负样本的个数(正样本的3倍) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) # 获取到负样本的下标 neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples # 计算包括正样本和负样本的置信度损失 pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) # 按照pos_idx和neg_idx指示的下标筛选参与计算损失的预测数据 conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N # 将损失函数归一化后返回 N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_landm /= N1 return loss_l, loss_c, loss_landm
def forward(self, predicts, targets): # 回归信息,置信度,先验框 loc_data, conf_data, priors = predicts # print(conf_data.shape) torch.Size([batch_size, 8732, num_classes+1]) # print(conf_data[0][2]) tensor([ 0.5261, -0.1007, 0.1242, -0.0905, 0.0839, -0.7308, 0.0174],device='cuda:0', grad_fn=<SelectBackward>) # 计算出batch_size num = loc_data.size(0) # print(loc_data.shape) torch.Size([1, 8732, 4]) # print('1',priors.shape) torch.Size([8732, 4]) # 取出所有的先验框 priors = priors[:loc_data.size(1), :] # 这一步就是保证priors的个数是和loc_data、conf_data的大小一样,其实本身就是一样的 # print('2',priors.shape) torch.Size([8732, 4]) # 先验框的数量 num_priors = (priors.size(0)) # 创建一个tensor进行处理 loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) loc_t = loc_t.cuda() conf_t = conf_t.cuda() priors = priors.cuda() for idx in range(num): # 获得框 truths = targets[idx][:, :-1] # target存放的很多行,每一行就是一张照片,里面包括了照片里面的每一个框和对应的标签 # 获得标签 labels = targets[idx][:, -1] # 获得先验框 defaults = priors # 找到标签对应的先验框 match(self.threshold, truths, defaults, self.variance, labels, # 每一个标签都对应了先验框,虽然这里没有返回值,但是loc_t和conf_t是一个tensor,函数里面对其改变了值,主函数也会跟着变化 loc_t, conf_t, idx) # 转化成Variable loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) # 所有conf_t>0的地方,代表内部包含物体 pos = conf_t > 0 # conf_t 有8732行,找到大于0的个数,相当于一张图片中8732个先验框中有pos个框是正样本 # print(pos.shape) torch.Size([1, 8732]) # 求和得到每一个图片内部有多少正样本 num_pos = pos.sum(dim=1, keepdim=True) # print(num_pos) tensor([[12]], device='cuda:0') # 计算回归loss,只是对正样本进行求解回归loss pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) # print(pos_idx) loc_p = loc_data[pos_idx].view(-1, 4) # 此时loc_data和pos_idx维度一样,选择出positive的loc # print(loc_p.shape) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # 转化形式 batch_conf = conf_data.view(-1, self.num_classes) # 你可以把softmax函数看成一种接受任何数字并转换为概率分布的非线性方法 # 获得每个框预测到真实框的类的概率 loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) loss_c = loss_c.view(num, -1) loss_c[pos] = 0 # 获得每一张图新的softmax的结果 _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # 计算每一张图的正样本数量 num_pos = pos.long().sum(1, keepdim=True) # 限制负样本数量 num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # 计算正样本的loss和负样本的loss pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) N = num_pos.data.sum() loss_l /= N loss_c /= N total_loss = loss_l + loss_c losses = [loss_l,loss_c,total_loss] return LossTuple(*losses)
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ # loc_data[batch_size, num_priors, 4] # conf_data[batch_size, num_priors, num_classes] # obj_data[batch_size, num_priors, 2] loc_data, conf_data, obj_data = predictions device = loc_data.device targets = [anno.to(device) for anno in targets] num = loc_data.size(0) num_priors = priors.size(0) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4).to(device) conf_t = torch.Tensor(num, num_priors, 2).to(device) obj_t = torch.BoolTensor(num, num_priors).to(device) # match priors with gt for idx in range(num): # batch_size truths = targets[idx][:, :-2].data # [obj_num, 4] labels = targets[idx][:, -2:].data # [obj_num] defaults = priors.data # [num_priors,4] match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, obj_t, idx) pos = (conf_t[:, :, 0] > 0).bool() # [num, num_priors] num_pos = (conf_t[:, :, 1] * pos.float()).sum(1, keepdim=True).long() # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] loc_p = loc_data[pos] loc_t = loc_t[pos] loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='none') weight_pos = conf_t[pos][:, 1] loss_l = torch.sum(torch.sum(loss_l, dim=1) * weight_pos) # Compute object loss across batch for hard negative mining with torch.no_grad(): loss_obj = F.cross_entropy(obj_data.view(-1, 2), obj_t.long().view(-1), reduction='none') # Hard Negative Mining loss_obj[obj_t.view( -1 )] = 0 # filter out pos boxes (label>0) and ignored boxes (label=-1) for now loss_obj = loss_obj.view(num, -1) _, loss_idx = loss_obj.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=num_priors - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # [num, num_priors] # Object Loss Including Positive and Negative Examples mask = pos | neg weight = conf_t[mask][:, 1] loss_obj = torch.sum( F.cross_entropy( obj_data[mask], obj_t[mask].long(), reduction='none') * weight) # Confidence Loss (cosine distance to classes center) # pos [num, num_priors] # conf_data [num, num_priors, feature_dim] batch_conf = conf_data.view(-1, self.num_classes - 1) # Compute max conf across batch for hard negative mining (logit-combined) batch_obj = obj_data.view(-1, 2) # [num*num_priors, 2] logit_0 = batch_obj[:, 0].unsqueeze(1) + torch.log( torch.exp(batch_conf).sum(dim=1, keepdim=True)) logit_k = batch_obj[:, 1].unsqueeze(1).expand_as(batch_conf) + batch_conf logit = torch.cat((logit_0, logit_k), 1) # Confidence Loss Including Positive and Negative Examples logit = logit.view(num, -1, self.num_classes) loss_c = torch.sum( F.cross_entropy( logit[mask], conf_t[mask][:, 0].long(), reduction='none') * weight) N = num_pos.sum() loss_l /= N loss_c /= N loss_obj /= N return { 'loss_box_reg': loss_l, 'loss_cls': loss_c, 'loss_obj': loss_obj }
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c = loss_c.view( pos.size()[0], pos.size() [1]) # add line #[32, 8732], /lzx1413/PytorchSSD/issues/10 loss_c[pos] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum().double() loss_l = loss_l.double() loss_c = loss_c.double() loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) if targets[0].shape[1] == 6: # mixup weight_t = torch.Tensor(num, num_priors) for idx in range(num): defaults = priors.data if targets[idx].shape[1] == 6: # mixup truths = targets[idx][:, :-2].data labels = targets[idx][:, -2].data weight_loss = targets[idx][:, -1].data match_mixup(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, weight_t, weight_loss, self.giou) elif targets[idx].shape[1] == 5: # no moxiup truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx, self.giou) else: print('The shape of targets is error') if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) pos = conf_t > 0 mix_up = (False, True)[targets[0].shape[1] == 6] pos_weight = None weights_conf = None # Localization Loss (Smooth L1) pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) if self.giou: prior_giou = point_form(priors) # [x,y,h,w]->[x0,y0,x1,y1] prior_giou = prior_giou.unsqueeze(0).expand(num, num_priors, 4) prior_giou = prior_giou[pos_idx].view(-1, 4) reg_loss = GIoUloss() loss_l = reg_loss(loc_p, prior_giou, loc_t) else: if mix_up: weight_t = weight_t.cuda() weight_t = Variable(weight_t, requires_grad=False) pos_weight = weight_t[pos].view(-1, 1) reg_loss = SmoothL1_Mixup_Balance_loss(mixup=mix_up, balance=self.balance_l1, size_average=False) loss_l = reg_loss(loc_p, loc_t, pos_weight) # Confidence Loss if self.sigmoid_focal: # if use original focal loss, please modify the output of the test in models/SSD.py to the sigmoid batch_conf = conf_data.view(-1, self.num_classes) label_onehot = batch_conf.clone().zero_().scatter( 1, conf_t.view(-1, 1), 1) alpha = self.alpha * label_onehot + (1 - self.alpha) * ( 1 - label_onehot) p = torch.sigmoid(batch_conf) pt = torch.where(label_onehot == 1, p, 1 - p) loss_c = -alpha * ((1 - pt)**self.gamma) * torch.log(pt) loss_c = loss_c.sum() num_pos = pos.long().sum(1, keepdim=True) else: batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) if self.label_smooth: p = conf_t.clone().view(-1, 1).float() lp = torch.where(p < 1, p + 1, torch.tensor(self.label_pos).cuda()) label = batch_conf.clone().zero_().scatter_( 1, conf_t.view(-1, 1), lp) label[:, 1:][pos.clone().view(-1, 1).flatten()] += self.label_neg label_ohem = (pos + neg).view(-1, 1).expand_as(batch_conf) targets_weighted = label[label_ohem.gt(0)].view( -1, self.num_classes) else: targets_weighted = conf_t[(pos + neg).gt(0)] if mix_up: weights_conf = weight_t[(pos + neg).gt(0)] weights_conf = torch.where(weights_conf > 0, weights_conf, weights_conf + 1.0).view(-1, 1) conf_loss = Crossentropy_Mixup_SoftmaxFocal_LableSmooth_loss( mixup=mix_up, focal_loss=self.softmax_focal, gamma=2.0, alpha=1.0, label_smooth=self.label_smooth, size_average=False) loss_c = conf_loss(conf_p, targets_weighted, weights_conf) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) ious = torch.Tensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].data labels = targets[idx][:, -1].data defaults = priors.data match(self.threshold,self.size_range,self.iou_param, self.adapt_param, \ self.iou_type, truths,defaults, self.variance,labels,loc_t,conf_t,idx,ious) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() ious = ious.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] if self.soft_label: # bce_target = torch.eye(self.num_classes)[targets_weighted] # if GPU: # bce_target = bce_target.cuda() # USE THE FULL GRADIENT OF NEGTIVE SAMPLES AND WEIGHTED GRADIENTS OF POSITIVE SAMPLES. ious[neg] = 1 target_ious = ious[pos + neg] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='none') loss_c = torch.sum(loss_c * target_ious) # loss_c = F.binary_cross_entropy_with_logits(conf_p, bce_target, \ # target_ious, size_average=False) else: loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, odm_data, priors, targets, arm_data=None, filter_object=False): loc_data, conf_data = odm_data if arm_data: arm_loc, arm_conf = arm_data priors = priors.detach() num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :-1].detach() labels = targets[idx][:, -1].detach() #for object detection if self.num_classes == 2: labels = labels > 0 if arm_data: refine_match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx, arm_loc[idx].detach()) else: match(self.threshold, truths, priors, self.variance, labels, loc_t, conf_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() if arm_data and filter_object: P = F.softmax(arm_conf, 2) arm_conf_tmp = P[:, :, 1] object_score_index = arm_conf_tmp <= self.object_score pos = conf_t > 0 pos[object_score_index.detach()] = 0 else: pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) #loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) loss_c = F.cross_entropy(batch_conf, conf_t.view(-1), ignore_index=-1, reduction='none') loss_c = loss_c.view(num, -1) # Hard Negative Mining pos_loss_c = loss_c[pos] loss_c[pos] = 0 # filter out pos boxes for now #loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) neg_loss_c = loss_c[neg] # Confidence Loss Including Positive and Negative Examples # pos_idx = pos.unsqueeze(2).expand_as(conf_data) # neg_idx = neg.unsqueeze(2).expand_as(conf_data) #conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) #targets_weighted = conf_t[(pos+neg).gt(0)] #loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N loss_c = pos_loss_c.sum() + neg_loss_c.sum() N = num_pos.data.sum().float() loss_l = loss_l / N loss_c = loss_c / N return loss_l, loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, landm_data = predictions priors = priors num = loc_data.size(0) #batch num_priors = (priors.size(0)) # 先验框个数 # 获取匹配每个prior box的 ground truth # 创建 loc_t 和 conf_t 保存真实box的位置和类别 # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:, :4].data # ground truth box信息 labels = targets[idx][:, -1].data # ground truth conf信息 landms = targets[idx][:, 4:14].data # ground truth landmark信息 defaults = priors.data # priors的 box 信息 # 匹配 ground truth match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() zeros = torch.tensor(0).cuda() # landm Loss (Smooth L1) # Shape: [batch,num_priors,10] pos1 = conf_t > zeros num_pos_landm = pos1.long().sum(1, keepdim=True) # 匹配中所有的正样本 N1 = max(num_pos_landm.data.sum().float(), 1) pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) landm_p = landm_data[pos_idx1].view(-1, 10) landm_t = landm_t[pos_idx1].view(-1, 10) loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') pos = conf_t != zeros conf_t[pos] = 1 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) # 预测的正样本box信息 loc_t = loc_t[pos_idx].view(-1, 4) # 真实的正样本box信息 loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Smooth L1 损失 ''' Target; 下面进行hard negative mining 过程: 1、 针对所有batch的conf,按照置信度误差(预测背景的置信度越小,误差越大)进行降序排列; 2、 负样本的label全是背景,那么利用log softmax 计算出logP, logP越大,则背景概率越低,误差越大; 3、 选取误差较大的top_k作为负样本,保证正负样本比例接近1:3; ''' # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) # 使用logsoftmax,计算置信度 loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view( -1, 1)] = 0 # filter out pos boxes for now 把正样本排除,剩下的就全是负样本,可以进行抽样 loss_c = loss_c.view(num, -1) # 两次sort排序,能够得到每个元素在降序排列中的位置idx_rank _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) # 抽取负样本 # 每个batch中正样本的数目 num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # 抽取前top_k个负样本 # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) # 提取出所有筛选好的正负样本(预测的和真实的) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_landm /= N1 return loss_l, loss_c, loss_landm
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) for idx in range(num): truths = targets[idx][:,:-1].data labels = targets[idx][:,-1].data defaults = priors.data match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t,requires_grad=False) pos = conf_t > 0 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1,4) loc_t = loc_t[pos_idx].view(-1,4) loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1,self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) # Hard Negative Mining loss_c[pos.view(-1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _,loss_idx = loss_c.sort(1, descending=True) _,idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1,keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum() loss_l/=N loss_c/=N return loss_l,loss_c
def forward(self, predictions, priors, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) ground_truth (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, landm_data, visible_data = predictions priors = priors num = loc_data.size(0) num_priors = (priors.size(0)) # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) landm_t = torch.Tensor(num, num_priors, 10) conf_t = torch.LongTensor(num, num_priors) angle_t = torch.LongTensor(num, num_priors) visible_t = torch.Tensor(num, num_priors, 5) # euler_t = torch.Tensor(num, num_priors, 3) for idx in range(num): ''' for label with angle ''' truths = targets[idx][:, :4].data labels = targets[idx][:, -7].data landms = targets[idx][:, 4:14].data angles = targets[idx][:, -6].data visible = targets[idx][:, -5:].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx, angles, angle_t, visible, visible_t) if GPU: loc_t = loc_t.cuda() conf_t = conf_t.cuda() landm_t = landm_t.cuda() angle_t = angle_t.cuda() visible_t = visible_t.cuda() zeros = torch.tensor(0).cuda() ang_thr = torch.tensor(60).cuda() pos1 = conf_t > zeros num_pos_landm = pos1.long().sum(1, keepdim=True) N1 = max(num_pos_landm.data.sum().float(), 1) pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) mask_angle = angle_t > ang_thr pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) # # baseline landm_p = landm_data[pos_idx1].view(-1, 10) landm_t = landm_t[pos_idx1].view(-1, 10) # HEM mask = (landm_t == -1 ) # we only calculate the loss of visible landmarks mask = torch.logical_not(mask) # loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='none') # wing loss loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='none') loss_landm_mask = mask * loss_landm loss_landm_mask_mean = torch.mean(loss_landm_mask, -1) loss_landm_mask_sum = torch.sum(loss_landm_mask, -1) # print('size {}'.format(loss_landm_mask.shape)) size = int(0.5 * loss_landm_mask.shape[0]) _, topk_idx = torch.topk(loss_landm_mask_mean, k=size) loss_landm = torch.sum(loss_landm_mask_sum[topk_idx]) N2 = size vis_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(visible_data) vis_p = visible_data[vis_idx1].view(-1, 5) vis_t = visible_t[vis_idx1].view(-1, 5) vis_p = torch.sigmoid(vis_p) criterions = nn.BCELoss(reduction='sum') loss_vis = criterions(vis_p, vis_t) pos = conf_t != zeros conf_t[pos] = 1 # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) ang_pos = mask_angle.unsqueeze(mask_angle.dim()).expand_as(loc_data) ang_not_pos = torch.logical_not(ang_pos) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') loss_l = torch.sum(loss_l) # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather( 1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) ang_conf_idx = mask_angle.unsqueeze( mask_angle.dim()).expand_as(conf_data) ang_not_conf_idx = torch.logical_not(ang_conf_idx) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view( -1, self.num_classes) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N loss_landm /= N2 loss_vis /= N1 return loss_l, loss_c, loss_landm, loss_vis