def parse_annotation_jpeg(annotation_path, jpeg_path, gs): """ 获取正负样本(注:忽略属性difficult为True的标注边界框) 正样本:候选建议与标注边界框IoU大于等于0.5 负样本:IoU大于0,小于0.5。为了进一步限制负样本数目,其大小必须大于标注框的1/5 """ img = cv2.imread(jpeg_path) selectivesearch.config(gs, img, strategy='q') # 计算候选建议 rects = selectivesearch.get_rects(gs) # 获取标注边界框 bndboxs = parse_xml(annotation_path) # 标注框大小 maximum_bndbox_size = 0 for bndbox in bndboxs: xmin, ymin, xmax, ymax = bndbox bndbox_size = (ymax - ymin) * (xmax - xmin) if bndbox_size > maximum_bndbox_size: maximum_bndbox_size = bndbox_size # 获取候选建议和标注边界框的IoU iou_list = compute_ious(rects, bndboxs) positive_list = list() negative_list = list() for i in range(len(iou_list)): xmin, ymin, xmax, ymax = rects[i] rect_size = (ymax - ymin) * (xmax - xmin) iou_score = iou_list[i] if iou_list[i] >= 0.5: # 正样本 positive_list.append(rects[i]) if 0 < iou_list[i] < 0.5 and rect_size > maximum_bndbox_size / 5.0: # 负样本 negative_list.append(rects[i]) else: pass return positive_list, negative_list
def parse_annotation_jpeg(annotation_path, jpeg_path, gs): """ 获取正负样本(注:忽略属性difficult为True的标注边界框) 正样本:候选建议与标注边界框IoU大于等于0.5 + 标注边界框 负样本:IoU大于0.1,小于0.5 """ img = cv2.imread(jpeg_path) selectivesearch.config(gs, img, strategy='q') # 计算候选建议 rects = selectivesearch.get_rects(gs) # 获取标注边界框 bndboxs = parse_xml(annotation_path) # 获取候选建议和标注边界框的IoU iou_list = compute_ious(rects, bndboxs) positive_list = list() negative_list = list() for i in range(len(iou_list)): xmin, ymin, xmax, ymax = rects[i] rect_size = (ymax - ymin) * (xmax - xmin) iou_score = iou_list[i] if iou_score >= 0.5: # 正样本 positive_list.append(rects[i]) if 0.1 <= iou_score < 0.5: # 负样本 negative_list.append(rects[i]) else: pass # 添加标注边界框到正样本列表 positive_list.extend(bndboxs) return positive_list, negative_list
def _anchors_target_level(self, anchors_level, gt_boxes, num_classes): gt_boxes = tf.cast(gt_boxes, tf.float32) anchors_shape = tf.shape(anchors_level) height, width, num_anchors, _ = anchors_shape labels = tf.zeros(shape=(height, width, num_anchors, num_classes)) # 1.discard invalid gt boxes. valid_gt = trim_zeros(gt_boxes) if len(valid_gt) == 0: return labels, tf.zeros_like(anchors_level), \ tf.zeros(shape=(height, width, num_anchors)), tf.zeros(shape=(height, width, num_anchors)) ious = compute_ious(anchors_level, valid_gt[..., 0:4]) gt_class = tf.cast(valid_gt[..., 4], tf.int64) # (h, w, 9) ious_max = tf.reduce_max(ious, axis=-1) ious_argmax = tf.argmax(ious, axis=-1) # 2.if max iou > positive threshold, anchors are assigned to ground truth. # (num_pos, 3) pos_index = tf.where(ious_max >= 0.5) class_id = tf.gather(gt_class, tf.gather_nd(ious_argmax, pos_index)) class_id = tf.expand_dims(class_id, axis=-1) positive_class_index = tf.concat([pos_index, class_id], axis=-1) num_positive = tf.shape(positive_class_index)[0] labels = tf.tensor_scatter_nd_update(labels, positive_class_index, tf.ones(shape=(num_positive, ))) # 3.if max iou < negative thredhold, anchors are assigned to background. neg_index = tf.where(ious_max < 0.4) # 4.transform boxes to delta ious_argmax = tf.reshape(ious_argmax, (height * width * num_anchors, )) valid_gt_box = valid_gt[..., 0:4] ious_argmax_box = tf.gather(valid_gt_box, ious_argmax) ious_argmax_box = tf.reshape(ious_argmax_box, (height, width, num_anchors, 4)) delta = box2delta(anchors_level, ious_argmax_box, self.target_means, self.target_stds) # 5.create label weights and box weights label_weights = tf.zeros(shape=(height, width, num_anchors)) box_weights = tf.zeros(shape=(height, width, num_anchors)) label_weights = tf.tensor_scatter_nd_update( label_weights, pos_index, tf.ones(shape=(num_positive, ))) num_negative = tf.shape(neg_index)[0] label_weights = tf.tensor_scatter_nd_update( label_weights, neg_index, tf.ones(shape=(num_negative, ))) box_weights = tf.tensor_scatter_nd_update( box_weights, pos_index, tf.ones(shape=(num_positive, ))) return labels, delta, label_weights, box_weights
def compute_loss(self, x, y, y_hat, step) -> (torch.Tensor, defaultdict, defaultdict): loss_config = self.config['loss'] total_losses = [] scalar_summaries = defaultdict(float) list_summaries = defaultdict(list) batch_size = self.config['batch_size'] \ if self.training else self.config['eval_batch_size'] for batch_idx in range(batch_size): # fix scene scene_idxs = (x.C[:, 0] == batch_idx) \ .nonzero().squeeze(dim=1) if scene_idxs.shape[0] <= 0: continue # unravel objects embs = y_hat[scene_idxs] # only backgrounds gt_insts = y[scene_idxs, 1] if gt_insts.sum() <= 0.: continue num_insts = gt_insts.max() inst_emb_means = [] inst_idxs = [] inter_losses = [] dist2mean = [] # compute average distance to instance mean for inst in range(1, num_insts + 1): # fix instance in data single_inst_idxs = (gt_insts == inst) # no instance if single_inst_idxs.sum() == 0: continue inst_idxs.append(single_inst_idxs) inst_embs = embs[single_inst_idxs] # Tensor of N x D inst_emb_mean = inst_embs.mean(dim=0) inst_emb_means.append(inst_emb_mean) # compute inter_loss inst_dists = torch.norm( inst_embs - inst_emb_mean.unsqueeze(dim=0), dim=1 ) # Tensor of N inter_losses.append(torch.relu(inst_dists - loss_config['delta_inter']).mean()) dist2mean.append(inst_dists) # inter loss inter_losses = torch.stack(inter_losses) num_inst_points = torch.tensor([x.shape[0] for x in inst_idxs]) # weight loss by p, s.t. 0 <= p <= 1 # if p == 0, equal weighting to each instance # if p == 1, equal weighting to per point # exclude bg from inter losses inter_loss_weight = num_inst_points.float() \ .pow(loss_config['inter_chill']).to(self.device) inter_loss_weight = inter_loss_weight / inter_loss_weight.sum() inter_loss = torch.dot(inter_losses, inter_loss_weight) # intra_loss inst_emb_means = torch.stack(inst_emb_means, dim=0) pair_dist_mean = pairwise_distance(inst_emb_means) pair_dist_mean = torch.sqrt(torch.relu(pair_dist_mean)) # relu assures positiveness henge_dist_pair = torch.relu(2 * loss_config['delta_intra'] - pair_dist_mean) intra_loss = henge_dist_pair.sum() - torch.diag(henge_dist_pair).sum() # delete for memory efficiency del pair_dist_mean del henge_dist_pair # if background alone or one instance intra_loss /= 2 * num_insts * (num_insts - 1) if num_insts <= 1: intra_loss = torch.tensor(0.).to(self.device) # reg_loss reg_loss = torch.norm(inst_emb_means, dim=1).mean() # sum all the losses eff_inter_loss = loss_config['gamma_inter'] * inter_loss eff_intra_loss = loss_config['gamma_intra'] * intra_loss eff_reg_loss = loss_config['gamma_reg'] * reg_loss total_loss = eff_inter_loss + eff_intra_loss + eff_reg_loss total_losses.append(total_loss) if torch.isnan(total_loss): __import__('pdb').set_trace() # add losses to summaries mode = 'train' if self.training else 'val' loss_prefix = 'loss/{}/'.format(mode) raw_prefix = loss_prefix + 'raw/' eff_prefix = loss_prefix + 'eff/' ratio_prefix = loss_prefix + 'ratio/' iou_prefix = 'iou/{}/'.format(mode) dist_prefix = 'dist/{}/'.format(mode) # add total loss scalar_summaries[loss_prefix + 'total'] += total_loss.item() # add raw loss scalar_summaries[raw_prefix + 'inter_loss'] += inter_loss.item() scalar_summaries[raw_prefix + 'intra_loss'] += intra_loss.item() scalar_summaries[raw_prefix + 'reg_loss'] += reg_loss.item() # add eff loss scalar_summaries[eff_prefix + 'inter_loss'] += eff_inter_loss.item() scalar_summaries[eff_prefix + 'intra_loss'] += eff_intra_loss.item() scalar_summaries[eff_prefix + 'reg_loss'] += eff_reg_loss.item() # add loss ratio if total_loss.item() != 0: scalar_summaries[ratio_prefix + 'inter_loss'] \ += eff_inter_loss.item() / total_loss.item() scalar_summaries[ratio_prefix + 'intra_loss'] \ += eff_intra_loss.item() / total_loss.item() scalar_summaries[ratio_prefix + 'reg_loss'] \ += eff_reg_loss.item() / total_loss.item() # add dist2mean dist2mean = torch.cat(dist2mean) # Tensor of shape N scalar_summaries[dist_prefix + 'dist_to_mean'] \ += dist2mean.mean().item() # without bg eff_dist2mean = dist2mean.clone() eff_dist2mean = eff_dist2mean[dist2mean > loss_config['delta_inter']] scalar_summaries[dist_prefix + 'eff_dist_to_mean'] \ += eff_dist2mean.mean().item() # bg dist2_emb_mean bg_embs = embs[gt_insts == 0] if bg_embs.nelement() != 0: bg_dist2emb_mean = pairwise_distance(bg_embs, inst_emb_means) bg_dist2emb_mean = bg_dist2emb_mean.min(dim=1).values # bg to nearest embedding mean scalar_summaries[dist_prefix + 'bg_dist_to_emb_mean'] \ += bg_dist2emb_mean.mean().item() list_summaries[dist_prefix + 'bg_dist_to_emb_mean_hist'] += bg_dist2emb_mean.cpu().tolist() if (not self.training) or (self.training and ((step + 1) % self.config['summary_step'] == 0)): list_summaries[loss_prefix + 'inter_loss_weight'] += inter_loss_weight.tolist() # use ground truth mean for debugging if num_insts > 1: inst_mean_seeds = [(inst_emb_means[i], i) for i in range(inst_emb_means.shape[0])] ious = compute_ious(inst_mean_seeds, embs, inst_idxs, self.config['emb_thres']) scalar_summaries[iou_prefix + 'mean_sample/mean'] += sum(ious) / float(len(ious)) scalar_summaries[iou_prefix + 'mean_sample/max'] += max(ious) scalar_summaries[iou_prefix + 'mean_sample/min'] += min(ious) list_summaries[iou_prefix + 'mean_sample'] += ious scalar_summaries = {k: (float(v) / batch_size) for (k, v) in scalar_summaries.items()} loss = torch.stack(total_losses).mean() return loss, scalar_summaries, list_summaries