def gen_loss_target(self, output_maps, coordinate_target, delta_ratio=0.2): """ :param output_maps: net output size(B, H, W, C) have transform to real coordinate (0-512) NOTICE !!!!! :param coordinate_target: from fun: detection_target, size(B, H, W, 8) :param delta_ratio: trans w / h to delta in 2D-gauss :return: """ # device = output_maps.device gauss_out = output_maps[..., :4] coordinate_out = output_maps[..., 4:] """ # gaussian score gaussian_score = torch.zeros((B, H, W, 4)).float() # distance target maps, ratio of distance distance_target = torch.zeros((B, H, W)).float() # size target maps, ratio of w / h, arctan size_target = torch.zeros((B, H, W)).float() # Discrete degree target maps, Mean square error discrete_target = torch.zeros((B, H, W)).float() """ # ====================【1】Gaussian Score ====================== target_bbox = gtP.corner2bboxHW(coordinate_target) target_size_w = (target_bbox[..., 2] - target_bbox[..., 0]).unsqueeze( -1) # size(B, H, W, 1) target_size_h = (target_bbox[..., 3] - target_bbox[..., 1]).unsqueeze( -1) # size(B, H, W, 1) target_size_w = torch.clamp(target_size_w, min=1e-6) target_size_h = torch.clamp(target_size_h, min=1e-6) gaussian_score_target = self.gauss_2d(coordinate_out[..., 0::2], coordinate_out[..., 1::2], coordinate_target[..., 0::2], coordinate_target[..., 1::2], delta_ratio * target_size_w, delta_ratio * target_size_h) # =====================【2】discrete loss ======================= score_means = ( gaussian_score_target[..., 0] + gaussian_score_target[..., 1] + gaussian_score_target[..., 2] + gaussian_score_target[..., 3]) / 4 # 均方根 discrete_target = torch.sqrt(( (gaussian_score_target[..., 0] - score_means)**2 + (gaussian_score_target[..., 1] - score_means)**2 + (gaussian_score_target[..., 2] - score_means)**2 + (gaussian_score_target[..., 3] - score_means)**2) / 4 + 1e-8) # =====================【3】Distance loss ======================= out_bbox = gtP.corner2bboxHW(coordinate_out) # centers of out and target target_centers = gtP.calc_centers(coordinate_target) out_centers = gtP.calc_centers(coordinate_out) distance_target = self.gen_distance_maps(out_centers, target_centers, out_bbox, target_bbox) # =======================【4】Size loss ========================= whwh_out = self.gen_whwh_maps(coordinate_out) whwh_target = self.gen_whwh_maps(coordinate_target) size_target = (torch.atan(whwh_out[..., 0] / whwh_out[..., 1]) - torch.atan(whwh_target[..., 0] / whwh_target[..., 1])) ** 2 + \ (torch.atan(whwh_out[..., 2] / whwh_out[..., 3]) - torch.atan(whwh_target[..., 2] / whwh_target[..., 3])) ** 2 size_target = size_target * 2 / (math.pi**2) return gaussian_score_target, distance_target, size_target, discrete_target
def gen_loss_target(self, output_maps, coordinate_target, delta_ratio=0.2): """ :param output_maps: net output size(B, H, W, C) have transform to real coordinate (0-512) NOTICE !!!!! :param coordinate_target: from fun: detection_target, size(B, H, W, 8) :param delta_ratio: trans w / h to delta in 2D-gauss :return: """ # device = output_maps.device gauss_out = output_maps[..., :4] coordinate_out = output_maps[..., 4:] """ # gaussian score gaussian_score = torch.zeros((B, H, W, 4)).float() # distance target maps, ratio of distance distance_target = torch.zeros((B, H, W)).float() # size target maps, ratio of w / h, arctan size_target = torch.zeros((B, H, W)).float() # Discrete degree target maps, Mean square error discrete_target = torch.zeros((B, H, W)).float() """ # ====================【1】Gaussian Score ====================== target_bbox = gtP.corner2bboxHW(coordinate_target) target_size_w = (target_bbox[..., 2] - target_bbox[..., 0]).unsqueeze( -1) # size(B, H, W, 1) target_size_h = (target_bbox[..., 3] - target_bbox[..., 1]).unsqueeze( -1) # size(B, H, W, 1) target_size_w = torch.clamp(target_size_w, min=1e-6) target_size_h = torch.clamp(target_size_h, min=1e-6) gaussian_score_target = self.gauss_2d(coordinate_out[..., 0::2], coordinate_out[..., 1::2], coordinate_target[..., 0::2], coordinate_target[..., 1::2], delta_ratio * target_size_w, delta_ratio * target_size_h) return gaussian_score_target
def nms_gauss(corners, scores, threshold=0.2, delta_ratio=0.2): # 降序排列,order下标排序 _, order = scores.sort(0, descending=True) # print(_) keep = [] while order.numel() > 0: # torch.numel()返回张量元素个数 if order.numel() == 1: # 保留框只剩一个 i = order.item() keep.append(i) break else: i = order[0].item() # 保留scores最大的那个框corner[i] keep.append(i) # 计算corners[i]与其余各框的Gauss Score target_bbox = gtP.corner2bboxSingle(corners[i]) # size(4) target_size_w = (target_bbox[2] - target_bbox[0]).unsqueeze( -1) # size(B, H, W, 1) target_size_h = (target_bbox[3] - target_bbox[1]).unsqueeze( -1) # size(B, H, W, 1) target_size_w = torch.clamp(target_size_w, min=1e-6) target_size_h = torch.clamp(target_size_h, min=1e-6) gaussian_score = gauss_2d(corners[order[1:], 0::2], corners[order[1:], 1::2], corners[order[0], 0::2], corners[order[0], 1::2], delta_ratio * target_size_w, delta_ratio * target_size_h) # size(N-1, 4) gaussian_score = torch.sum(gaussian_score, dim=-1) / 4 # print(gaussian_score) idx = (gaussian_score <= threshold).nonzero().squeeze() # 注意此时idx为[N-1,] 而order为[N,] if idx.numel() == 0: break order = order[idx + 1] # 修补索引之间的差值 return torch.LongTensor(keep) # Pytorch的索引值为LongTensor
def detection_target(output_maps, corners_list_512, stage_lvl=4): """ :param output_maps: net output size(B, H, W, C) have transform to real coordinate (0-512) NOTICE !!!!! :param corners_list_512: list(B), tensor(N, 8), here B is batch_size, N is obj number in one image, 512 scale :param stage_lvl: which level the corners project to :return: sample_area_target, coordinate_target """ device = output_maps.device num_imgs = len(corners_list_512) corners_list_32 = [ single_corners / (2**stage_lvl) for single_corners in corners_list_512 ] B, H, W, C = output_maps.shape # ======================target initial========================== # positive and negative area maps sample_area_target = torch.zeros((B, H, W)).long().to(device) # coordinate target maps, the corresponding gt coordinate coordinate_target = torch.zeros((B, H, W, 8)).float().to(device) """ 其他的在loss函数里面计算吧 # distance target maps, ratio of distance distance_target = torch.zeros((B, H, W)).float() # size target maps, ratio of w / h, arctan size_target = torch.zeros((B, H, W)).float() # Discrete degree target maps, Mean square error discrete_target = torch.zeros((B, H, W)).float() """ # no grad # output_maps_detach = output_maps.detach() # each image in batch for img in range(num_imgs): # single_map_detach = output_maps_detach[img] # size(12, 32, 32) single_corners_32 = corners_list_32[img] # size(N, 8) single_corners_512 = corners_list_512[img] # calculate pos & neg areas obj_num = single_corners_32.shape[0] for obj in range(obj_num): # which to distribute dist_idx = gtP.scale_distribute( single_corners_512[obj], tra_cfg.K_Means_args['split_value']) # print('distribute', dist_idx) # e_spatial_map, i_spatial_map are all bool tensors e_spatial_map, i_spatial_map = get_spatial_idx( single_corners_32[obj], W, H, dist_idx, device) # print(i_spatial_map[i_spatial_map == 1].size()) # print(i_spatial_map.shape) coordinate_target[img, e_spatial_map == 1] = single_corners_512[obj] # sample_area_target 非背景部分乘以新的i_spatial_map得到相交部分,i_spatial_map除去相交点再赋值 i_spatial_map = (i_spatial_map.byte() - ( (sample_area_target[img] != 0) * i_spatial_map).byte()).bool() # print(i_spatial_map[i_spatial_map == 1].size()) # print(i_spatial_map.shape) sample_area_target[img, e_spatial_map == 1] = 1 sample_area_target[img, i_spatial_map == 1] = -1 return sample_area_target, coordinate_target
def get_spatial_idx(corner_xy, W, H, scale_idx, device): """ :param corner_xy: torch size(8) :param W: detection map size :param H: as above :param scale_idx: corner size to distribute :param device: :return: """ device = corner_xy.device y, x = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) grid = torch.stack([x, y], dim=-1).float() grid_center = (grid + 0.5).reshape(-1, 2).to(device) # zero-tensor w/ (H,W) e_spatial_idx = torch.zeros((H, W)).bool() i_spatial_idx = torch.zeros((H, W)).bool() # calculate corner coordinate effective_corner, ignore_corner = gtP.corner_scale_extend( corner_xy, tra_cfg.K_Means_args['effective_ratio'][scale_idx], tra_cfg.K_Means_args['ignore_ratio'][scale_idx]) # print(effective_corner, ignore_corner) # effective eff_bool_grid = gtP.distinguish_point_pos(effective_corner, grid_center) eff_bool_grid = eff_bool_grid.reshape(H, W) e_spatial_idx[eff_bool_grid] = 1 # ignore ign_bool_grid = gtP.distinguish_point_pos(ignore_corner, grid_center) ign_bool_grid = ign_bool_grid.reshape(H, W) # dilate process, make it at least has a surrounding ignore areas dilate_bool_grid = gtP.dilate_3x3(eff_bool_grid) i_spatial_idx[dilate_bool_grid] = 1 i_spatial_idx[ign_bool_grid] = 1 i_spatial_idx[eff_bool_grid] = 0 return e_spatial_idx.to(device), i_spatial_idx.to(device)
def gen_distance_maps(out_centers, target_centers, out_bbox, target_bbox): """ :param out_centers: size(B, H, W, 2) :param target_centers: size(B, H, W, 2) :param out_bbox: size(B, H, W, 4) :param target_bbox: size(B, H, W, 4) :return: """ center_distance = (out_centers[..., 0] - target_centers[..., 0]) ** 2 + \ (out_centers[..., 1] - target_centers[..., 1]) ** 2 union_bbox = gtP.corner2bboxHW( torch.cat([out_bbox, target_bbox], dim=-1)) corner_distance = (union_bbox[..., 2] - union_bbox[..., 0]) ** 2 + \ (union_bbox[..., 3] - union_bbox[..., 1]) ** 2 distance_maps = center_distance / corner_distance return distance_maps
def clac_gauss_score_eval(coordinate_out, coordinate_target, delta_ratio=0.2): """ :param coordinate_out: size(8) :param coordinate_target: same as above :param delta_ratio :return: """ target_bbox = gtP.corner2bboxSingle(coordinate_target) # out size(4) target_size_w = (target_bbox[2] - target_bbox[0]).unsqueeze(-1) # size(1) target_size_h = (target_bbox[3] - target_bbox[1]).unsqueeze(-1) # size(1) target_size_w = torch.clamp(target_size_w, min=1e-6) target_size_h = torch.clamp(target_size_h, min=1e-6) gaussian_score4 = gauss_2d(coordinate_out[0::2], coordinate_out[1::2], coordinate_target[0::2], coordinate_target[1::2], delta_ratio * target_size_w, delta_ratio * target_size_h) # size(4) gauss_score = (torch.sum(gaussian_score4) / 4) # size(1) return gauss_score
def clac_gauss_score_multi(coordinate_out, coordinate_target, delta_ratio=0.2): """ :param coordinate_out: size(8) :param coordinate_target: size(N, 8) :param delta_ratio :return: """ target_bbox = gtP.corner2bboxMulti(coordinate_target) # out size(N, 4) target_size_w = (target_bbox[:, 2] - target_bbox[:, 0]).unsqueeze( -1) # size(N, 1) target_size_h = (target_bbox[:, 3] - target_bbox[:, 1]).unsqueeze( -1) # size(N, 1) target_size_w = torch.clamp(target_size_w, min=1e-6) target_size_h = torch.clamp(target_size_h, min=1e-6) gaussian_score4 = gauss_2d(coordinate_out[0::2], coordinate_out[1::2], coordinate_target[:, 0::2], coordinate_target[:, 1::2], delta_ratio * target_size_w, delta_ratio * target_size_h) # size(N, 4) gauss_score = (torch.sum(gaussian_score4, dim=-1) / 4) # size(N) return gauss_score
def detection_analysis(outputs, top_k=50, threshold=0.5, out_size=512.0): """ :param outputs: from detection network, size(B, H, W, C) :param top_k: select top k corners :param threshold: :param out_size: detection outputs size, to limit bound (0~512) :return: """ outputs_list = [] outputs = outputs.data B = outputs.shape[0] gauss_scores = (torch.sum(outputs[..., :4], dim=-1) / 4).reshape( B, -1) # size(B, H*W) coord_outputs = outputs[..., 4:].reshape(B, -1, 8) # size(B, H*W, 8) for i in range(B): _, order = gauss_scores[i].sort(dim=0, descending=True) # order size(B, H*W) gauss_score = gauss_scores[i, order[:top_k]] # size(top_k) coord_output = coord_outputs[i, order[:top_k]] # size(top_k, 8) # 下面保证所得的四边形外接矩形长宽不为0 bbox_output = gtP.corner2bboxHW(coord_output) # size(top_k, 4) w_output = bbox_output[..., 2] - bbox_output[..., 0] # size(top_k) h_output = bbox_output[..., 3] - bbox_output[..., 1] # size(top_k) match_w = (w_output > 0) match_h = (h_output > 0) nonzero_match = (match_w * match_h).nonzero().squeeze() gauss_score = gauss_score[nonzero_match] coord_output = coord_output[nonzero_match] idx = (gauss_score >= threshold).nonzero().squeeze() gauss_score = gauss_score[idx] coord_output = coord_output[idx] coord_output = coord_output.clamp(min=0.0, max=out_size) if len(coord_output.shape) != 2: gauss_score = gauss_score.unsqueeze(0) coord_output = coord_output.unsqueeze(0) outputs_list.append({ 'score': gauss_score, # tensor size(obj) 'coord': coord_output # tensor size(obj, 8) }) return outputs_list
gauss_target) # ===================== total loss ======================= coord_loss = smooth_L1_map coord_loss = torch.sum( coord_loss[sample_area_target == 1]) / torch.sum( sample_area_target == 1) score_loss = sample_focal_loss detection_loss = score_loss + coord_loss # print(detection_loss, coord_loss, score_loss) return detection_loss, coord_loss, score_loss if __name__ == '__main__': corners = torch.tensor( [[100.0, 100.0, 170.0, 120.0, 170.0, 135.0, 100.0, 115.0]]) distr_idx = gtP.scale_distribute( corners, splitValue=tra_cfg.K_Means_args['split_value']) distr_idx = distr_idx[0] print("idx", distr_idx) corners = corners / 16 print('corner / 16', corners) e, i = get_spatial_idx(corners, 32, 32, distr_idx, corners.device) with open('1.txt', 'w') as f: for y in range(e.shape[0]): for x in range(e.shape[1]): f.write(str(e[y][x].item())) f.write(' ') f.write('\n') f.write('\n=======================\n') for y in range(i.shape[0]): for x in range(i.shape[1]): f.write(str(i[y][x].item()))