def get_sampled_approxs(self, featmap_sizes, img_metas, cfg, device='cuda'): """Get sampled approxs and inside flags according to feature map sizes. 用于生成所有的anchor,而且还有anchor是否有效 Args: featmap_sizes (list[tuple]): Multi-level feature map sizes.特征图大小 img_metas (list[dict]): Image meta info.图片标签 device (torch.device | str): device for returned tensors Returns: tuple: approxes of each image, inside flags of each image """ num_imgs = len(img_metas) #有多少个标签 num_levels = len(featmap_sizes) #有多少个特征图 # since feature map sizes of all images are the same, we only compute # approxes for one time multi_level_approxs = [] for i in range(num_levels): #对所有的特征图生成anchor approxs = self.approx_generators[i].grid_anchors( featmap_sizes[i], self.anchor_strides[i], device=device) #生成一个特征图上的anchor,是(N,4) multi_level_approxs.append(approxs) approxs_list = [multi_level_approxs for _ in range(num_imgs) ] #每个图片都要有所有anchor,但是每个图片的ANCHOR都是一样的,所以直接复制 # for each image, we compute inside flags of multi level approxes计算出所有的anchor在每个图上对应的有效标识 inside_flag_list = [] for img_id, img_meta in enumerate(img_metas): #对每张图片的标签 multi_level_flags = [] multi_level_approxs = approxs_list[img_id] #选中他对应的anchor for i in range(num_levels): #对每个尺度的特征图 approxs = multi_level_approxs[i] #选中他对应的anchor anchor_stride = self.anchor_strides[i] feat_h, feat_w = featmap_sizes[i] #计算特征图的大小 h, w, _ = img_meta['pad_shape'] #标签形状 valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h) #计算特征图在原图上的部分 valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w) flags = self.approx_generators[i].valid_flags( (feat_h, feat_w), (valid_feat_h, valid_feat_w), device=device) #让特征和标签在特征重叠,对于超出特征图的标签进行筛选 inside_flags_list = [] for i in range(self.approxs_per_octave): split_valid_flags = flags[i::self.approxs_per_octave] split_approxs = approxs[i::self.approxs_per_octave, :] inside_flags = anchor_inside_flags( split_approxs, split_valid_flags, img_meta['img_shape'][:2], cfg.allowed_border) inside_flags_list.append(inside_flags) # inside_flag for a position is true if any anchor in this # position is true inside_flags = (torch.stack(inside_flags_list, 0).sum(dim=0) > 0) multi_level_flags.append(inside_flags) inside_flag_list.append(multi_level_flags) return approxs_list, inside_flag_list
def get_sampled_approxs(self, featmap_sizes, img_metas, cfg, device="cuda"): """Get sampled approxs and inside flags according to feature map sizes. Args: featmap_sizes (list[tuple]): Multi-level feature map sizes. img_metas (list[dict]): Image meta info. device (torch.device | str): device for returned tensors Returns: tuple: approxes of each image, inside flags of each image """ num_imgs = len(img_metas) num_levels = len(featmap_sizes) # since feature map sizes of all images are the same, we only compute # approxes for one time multi_level_approxs = [] for i in range(num_levels): approxs = self.approx_generators[i].grid_anchors( featmap_sizes[i], self.anchor_strides[i], device=device) multi_level_approxs.append(approxs) approxs_list = [multi_level_approxs for _ in range(num_imgs)] # for each image, we compute inside flags of multi level approxes inside_flag_list = [] for img_id, img_meta in enumerate(img_metas): multi_level_flags = [] multi_level_approxs = approxs_list[img_id] for i in range(num_levels): approxs = multi_level_approxs[i] anchor_stride = self.anchor_strides[i] feat_h, feat_w = featmap_sizes[i] h, w = img_meta["pad_shape"][:2] valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h) valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w) flags = self.approx_generators[i].valid_flags( (feat_h, feat_w), (valid_feat_h, valid_feat_w), device=device) inside_flags_list = [] for i in range(self.approxs_per_octave): split_valid_flags = flags[i::self.approxs_per_octave] split_approxs = approxs[i::self.approxs_per_octave, :] inside_flags = anchor_inside_flags( split_approxs, split_valid_flags, img_meta["img_shape"][:2], cfg.allowed_border, ) inside_flags_list.append(inside_flags) # inside_flag for a position is true if any anchor in this # position is true inside_flags = torch.stack(inside_flags_list, 0).sum(dim=0) > 0 multi_level_flags.append(inside_flags) inside_flag_list.append(multi_level_flags) return approxs_list, inside_flag_list
def get_sampled_approxs(self, featmap_sizes, img_metas, device='cuda'): """Get sampled approxs and inside flags according to feature map sizes. Args: featmap_sizes (list[tuple]): Multi-level feature map sizes. img_metas (list[dict]): Image meta info. device (torch.device | str): device for returned tensors Returns: tuple: approxes of each image, inside flags of each image """ num_imgs = len(img_metas) # since feature map sizes of all images are the same, we only compute # approxes for one time multi_level_approxs = self.approx_anchor_generator.grid_priors( featmap_sizes, device=device) approxs_list = [multi_level_approxs for _ in range(num_imgs)] # for each image, we compute inside flags of multi level approxes inside_flag_list = [] for img_id, img_meta in enumerate(img_metas): multi_level_flags = [] multi_level_approxs = approxs_list[img_id] # obtain valid flags for each approx first multi_level_approx_flags = self.approx_anchor_generator \ .valid_flags(featmap_sizes, img_meta['pad_shape'], device=device) for i, flags in enumerate(multi_level_approx_flags): approxs = multi_level_approxs[i] inside_flags_list = [] for i in range(self.approxs_per_octave): split_valid_flags = flags[i::self.approxs_per_octave] split_approxs = approxs[i::self.approxs_per_octave, :] inside_flags = anchor_inside_flags( split_approxs, split_valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) inside_flags_list.append(inside_flags) # inside_flag for a position is true if any anchor in this # position is true inside_flags = ( torch.stack(inside_flags_list, 0).sum(dim=0) > 0) multi_level_flags.append(inside_flags) inside_flag_list.append(multi_level_flags) return approxs_list, inside_flag_list
def _get_targets_single(self, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Most of the codes are the same with the base class :obj: `AnchorHead`, except that it also collects and returns the matched gt index in the image (from 0 to num_gt-1). If the anchor bbox is not matched to any gt, the corresponding value in pos_gt_inds is -1. """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # Assign gt and sample anchors anchors = flat_anchors[inside_flags.type(torch.bool), :] assign_result = self.assigner.assign( anchors, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.background_label, dtype=torch.long) label_weights = anchors.new_zeros((num_valid_anchors, label_channels), dtype=torch.float) pos_gt_inds = anchors.new_full((num_valid_anchors, ), -1, dtype=torch.long) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if not self.reg_decoded_bbox: pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 # The assigned gt_index for each anchor. (0-based) pos_gt_inds[pos_inds] = sampling_result.pos_assigned_gt_inds if gt_labels is None: # only rpn gives gt_labels as None, this time FG is 1 labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight # shadowed_labels is a tensor composed of tuples # (anchor_inds, class_label) that indicate those anchors lying in the # outer region of a gt or overlapped by another gt with a smaller # area. # # Therefore, only the shadowed labels are ignored for loss calculation. # the key `shadowed_labels` is defined in :obj:`CenterRegionAssigner` shadowed_labels = assign_result.get_extra_property('shadowed_labels') if shadowed_labels is not None and shadowed_labels.numel(): if len(shadowed_labels.shape) == 2: idx_, label_ = shadowed_labels[:, 0], shadowed_labels[:, 1] assert (labels[idx_] != label_).all(), \ 'One label cannot be both positive and ignored' # If background_label is 0. Then all labels increase by 1 label_ += int(self.background_label == 0) label_weights[idx_, label_] = 0 else: label_weights[shadowed_labels] = 0 if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) labels = unmap(labels, num_total_anchors, inside_flags) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) pos_gt_inds = unmap(pos_gt_inds, num_total_anchors, inside_flags, fill=-1) return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds, pos_gt_inds)
def _get_target_single(self, flat_anchors, valid_flags, num_level_anchors, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression, classification targets for anchors in a single image. Args: flat_anchors (Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors ,4) valid_flags (Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). num_level_anchors Tensor): Number of anchors of each scale level. gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). img_meta (dict): Meta info of the image. label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: N is the number of total anchors in the image. labels (Tensor): Labels of all anchors in the image with shape (N,). label_weights (Tensor): Label weights of all anchor in the image with shape (N,). bbox_targets (Tensor): BBox targets of all anchors in the image with shape (N, 4). bbox_weights (Tensor): BBox weights of all anchors in the image with shape (N, 4) pos_inds (Tensor): Indices of postive anchor with shape (num_pos,). neg_inds (Tensor): Indices of negative anchor with shape (num_neg,). """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] num_level_anchors_inside = self.get_num_level_anchors_inside( num_level_anchors, inside_flags) assign_result = self.assigner.assign(anchors, num_level_anchors_inside, gt_bboxes, gt_bboxes_ignore, gt_labels) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if hasattr(self, 'bbox_coder'): pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: # used in VFNetHead pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.num_classes) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) return (anchors, labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds)
def _get_targets_single(self, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Args: flat_anchors (Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors ,4) valid_flags (Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). img_meta (dict): Meta info of the image. gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). img_meta (dict): Meta info of the image. gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: labels_list (list[Tensor]): Labels of each level label_weights_list (list[Tensor]): Label weights of each level bbox_targets_list (list[Tensor]): BBox targets of each level bbox_weights_list (list[Tensor]): BBox weights of each level num_total_pos (int): Number of positive samples in all images num_total_neg (int): Number of negative samples in all images """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] assign_result = self.assigner.assign( anchors, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.background_label, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if not self.reg_decoded_bbox: pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: # only rpn gives gt_labels as None, this time FG is 1 labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.background_label) # fill bg label label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds, sampling_result)
def _get_targets_single(self, bbox_preds, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Args: bbox_preds (Tensor): Bbox prediction of the image, which shape is (h * w ,4) flat_anchors (Tensor): Anchors of the image, which shape is (h * w * num_anchors ,4) valid_flags (Tensor): Valid flags of the image, which shape is (h * w * num_anchors,). gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). img_meta (dict): Meta info of the image. gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: labels (Tensor): Labels of image, which shape is (h * w * num_anchors, ). label_weights (Tensor): Label weights of image, which shape is (h * w * num_anchors, ). pos_inds (Tensor): Pos index of image. neg_inds (Tensor): Neg index of image. sampling_result (obj:`SamplingResult`): Sampling result. pos_bbox_weights (Tensor): The Weight of using to calculate the bbox branch loss, which shape is (num, ). pos_predicted_boxes (Tensor): boxes predicted value of using to calculate the bbox branch loss, which shape is (num, 4). pos_target_boxes (Tensor): boxes target value of using to calculate the bbox branch loss, which shape is (num, 4). """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 8 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] bbox_preds = bbox_preds.reshape(-1, 4) bbox_preds = bbox_preds[inside_flags, :] # decoded bbox decoder_bbox_preds = self.bbox_coder.decode(anchors, bbox_preds) assign_result = self.assigner.assign( decoder_bbox_preds, anchors, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) pos_bbox_weights = assign_result.get_extra_property('pos_idx') pos_predicted_boxes = assign_result.get_extra_property( 'pos_predicted_boxes') pos_target_boxes = assign_result.get_extra_property('target_boxes') sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.num_classes) # fill bg label label_weights = unmap(label_weights, num_total_anchors, inside_flags) return (labels, label_weights, pos_inds, neg_inds, sampling_result, pos_bbox_weights, pos_predicted_boxes, pos_target_boxes)
def gfl_target_single(self, flat_anchors, valid_flags, num_level_anchors, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, cfg, label_channels=1, unmap_outputs=True): inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], cfg.allowed_border) if not inside_flags.any(): return (None, ) * 6 # assign gt and sample anchors anchors = flat_anchors[inside_flags.type(torch.bool), :] num_level_anchors_inside = self.get_num_level_anchors_inside( num_level_anchors, inside_flags) bbox_assigner = build_assigner(cfg.assigner) assign_result = bbox_assigner.assign(anchors, num_level_anchors_inside, gt_bboxes, gt_bboxes_ignore, gt_labels) bbox_sampler = PseudoSampler() sampling_result = bbox_sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: inside_flags = inside_flags.type(torch.bool) num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) return (anchors, labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds)
def _get_target_single(self, cls_scores, bbox_preds, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression, classification targets for anchors in a single image. Args: cls_scores (list(Tensor)): Box scores for each image. bbox_preds (list(Tensor)): Box energies / deltas for each image. flat_anchors (Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors ,4) valid_flags (Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). img_meta (dict): Meta info of the image. label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: N is the number of total anchors in the image. anchors (Tensor): All anchors in the image with shape (N, 4). labels (Tensor): Labels of all anchors in the image with shape (N,). label_weights (Tensor): Label weights of all anchor in the image with shape (N,). bbox_targets (Tensor): BBox targets of all anchors in the image with shape (N, 4). norm_alignment_metrics (Tensor): Normalized alignment metrics of all priors in the image with shape (N,). """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] assign_result = self.alignment_assigner.assign( cls_scores[inside_flags, :], bbox_preds[inside_flags, :], anchors, gt_bboxes, gt_bboxes_ignore, gt_labels, self.alpha, self.beta) assign_ious = assign_result.max_overlaps assign_metrics = assign_result.assign_metrics sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) norm_alignment_metrics = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: # point-based pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 class_assigned_gt_inds = torch.unique( sampling_result.pos_assigned_gt_inds) for gt_inds in class_assigned_gt_inds: gt_class_inds = pos_inds[sampling_result.pos_assigned_gt_inds == gt_inds] pos_alignment_metrics = assign_metrics[gt_class_inds] pos_ious = assign_ious[gt_class_inds] pos_norm_alignment_metrics = pos_alignment_metrics / ( pos_alignment_metrics.max() + 10e-8) * pos_ious.max() norm_alignment_metrics[gt_class_inds] = pos_norm_alignment_metrics # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.num_classes) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) norm_alignment_metrics = unmap(norm_alignment_metrics, num_total_anchors, inside_flags) return (anchors, labels, label_weights, bbox_targets, norm_alignment_metrics)
def assign(self, mlvl_anchors, mlvl_valid_flags, gt_bboxes, img_meta, featmap_sizes, anchor_scale, anchor_strides, gt_bboxes_ignore=None, gt_labels=None, allowed_border=0): """Assign gt to anchors. This method assign a gt bbox to every bbox (proposal/anchor), each bbox will be assigned with -1, 0, or a positive number. -1 means don't care, 0 means negative sample, positive number is the index (1-based) of assigned gt. The assignment is done in following steps, the order matters. 1. Assign every anchor to 0 (negative) For each gt_bboxes: 2. Compute ignore flags based on ignore_region then assign -1 to anchors w.r.t. ignore flags 3. Compute pos flags based on center_region then assign gt_bboxes to anchors w.r.t. pos flags 4. Compute ignore flags based on adjacent anchor lvl then assign -1 to anchors w.r.t. ignore flags 5. Assign anchor outside of image to -1 Args: mlvl_anchors (list[Tensor]): Multi level anchors. mlvl_valid_flags (list[Tensor]): Multi level valid flags. gt_bboxes (Tensor): Ground truth bboxes of image img_meta (dict): Meta info of image. featmap_sizes (list[Tensor]): Feature mapsize each level anchor_scale (int): Scale of the anchor. anchor_strides (list[int]): Stride of the anchor. gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4). gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are labelled as `ignored`, e.g., crowd boxes in COCO. gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ). allowed_border (int, optional): The border to allow the valid anchor. Defaults to 0. Returns: :obj:`AssignResult`: The assign result. """ if gt_bboxes_ignore is not None: raise NotImplementedError num_gts = gt_bboxes.shape[0] num_bboxes = sum(x.shape[0] for x in mlvl_anchors) if num_gts == 0 or num_bboxes == 0: # No ground truth or boxes, return empty assignment max_overlaps = gt_bboxes.new_zeros((num_bboxes, )) assigned_gt_inds = gt_bboxes.new_zeros((num_bboxes, ), dtype=torch.long) if gt_labels is None: assigned_labels = None else: assigned_labels = gt_bboxes.new_full((num_bboxes, ), -1, dtype=torch.long) return AssignResult(num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels) num_lvls = len(mlvl_anchors) r1 = (1 - self.center_ratio) / 2 r2 = (1 - self.ignore_ratio) / 2 scale = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (gt_bboxes[:, 3] - gt_bboxes[:, 1])) min_anchor_size = scale.new_full( (1, ), float(anchor_scale * anchor_strides[0])) target_lvls = torch.floor( torch.log2(scale) - torch.log2(min_anchor_size) + 0.5) target_lvls = target_lvls.clamp(min=0, max=num_lvls - 1).long() # 1. assign 0 (negative) by default mlvl_assigned_gt_inds = [] mlvl_ignore_flags = [] for lvl in range(num_lvls): h, w = featmap_sizes[lvl] assert h * w == mlvl_anchors[lvl].shape[0] assigned_gt_inds = gt_bboxes.new_full((h * w, ), 0, dtype=torch.long) ignore_flags = torch.zeros_like(assigned_gt_inds) mlvl_assigned_gt_inds.append(assigned_gt_inds) mlvl_ignore_flags.append(ignore_flags) for gt_id in range(num_gts): lvl = target_lvls[gt_id].item() featmap_size = featmap_sizes[lvl] stride = anchor_strides[lvl] anchors = mlvl_anchors[lvl] gt_bbox = gt_bboxes[gt_id, :4] # Compute regions ignore_region = calc_region(gt_bbox, r2, stride, featmap_size) ctr_region = calc_region(gt_bbox, r1, stride, featmap_size) # 2. Assign -1 to ignore flags ignore_flags = anchor_ctr_inside_region_flags( anchors, stride, ignore_region) mlvl_assigned_gt_inds[lvl][ignore_flags] = -1 # 3. Assign gt_bboxes to pos flags pos_flags = anchor_ctr_inside_region_flags(anchors, stride, ctr_region) mlvl_assigned_gt_inds[lvl][pos_flags] = gt_id + 1 # 4. Assign -1 to ignore adjacent lvl if lvl > 0: d_lvl = lvl - 1 d_anchors = mlvl_anchors[d_lvl] d_featmap_size = featmap_sizes[d_lvl] d_stride = anchor_strides[d_lvl] d_ignore_region = calc_region(gt_bbox, r2, d_stride, d_featmap_size) ignore_flags = anchor_ctr_inside_region_flags( d_anchors, d_stride, d_ignore_region) mlvl_ignore_flags[d_lvl][ignore_flags] = 1 if lvl < num_lvls - 1: u_lvl = lvl + 1 u_anchors = mlvl_anchors[u_lvl] u_featmap_size = featmap_sizes[u_lvl] u_stride = anchor_strides[u_lvl] u_ignore_region = calc_region(gt_bbox, r2, u_stride, u_featmap_size) ignore_flags = anchor_ctr_inside_region_flags( u_anchors, u_stride, u_ignore_region) mlvl_ignore_flags[u_lvl][ignore_flags] = 1 # 4. (cont.) Assign -1 to ignore adjacent lvl for lvl in range(num_lvls): ignore_flags = mlvl_ignore_flags[lvl] mlvl_assigned_gt_inds[lvl][ignore_flags] = -1 # 5. Assign -1 to anchor outside of image flat_assigned_gt_inds = torch.cat(mlvl_assigned_gt_inds) flat_anchors = torch.cat(mlvl_anchors) flat_valid_flags = torch.cat(mlvl_valid_flags) assert (flat_assigned_gt_inds.shape[0] == flat_anchors.shape[0] == flat_valid_flags.shape[0]) inside_flags = anchor_inside_flags(flat_anchors, flat_valid_flags, img_meta['img_shape'], allowed_border) outside_flags = ~inside_flags flat_assigned_gt_inds[outside_flags] = -1 if gt_labels is not None: assigned_labels = torch.zeros_like(flat_assigned_gt_inds) pos_flags = assigned_gt_inds > 0 assigned_labels[pos_flags] = gt_labels[ flat_assigned_gt_inds[pos_flags] - 1] else: assigned_labels = None return AssignResult(num_gts, flat_assigned_gt_inds, None, labels=assigned_labels)
def _get_target_single(self, flat_anchors, valid_flags, num_level_anchors, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 6 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] num_level_anchors_inside = self.get_num_level_anchors_inside( num_level_anchors, inside_flags) assign_result = self.assigner.assign(anchors, num_level_anchors_inside, gt_bboxes, gt_bboxes_ignore, gt_labels) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.background_label, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.num_classes) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) return (anchors, labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds)
def _get_targets_single(self, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_masks, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Args: flat_anchors (Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors ,4) valid_flags (Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). img_meta (dict): Meta info of the image. gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). img_meta (dict): Meta info of the image. gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: labels_list (list[Tensor]): Labels of each level label_weights_list (list[Tensor]): Label weights of each level bbox_targets_list (list[Tensor]): BBox targets of each level bbox_weights_list (list[Tensor]): BBox weights of each level num_total_pos (int): Number of positive samples in all images num_total_neg (int): Number of negative samples in all images """ ## 使用水平框的gt_bboxes来进行anchor的assign ## 使用gt_masks来生成gt_rbboxes,并进行target的计算。 inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] assign_result = self.assigner.assign( anchors, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) # EXAMPLE # 28 pos , iou > 0.5 -> assign_result.gt_inds = 1 # 79 bad pos , 0.4< iou < 0.5 -> assign_result.gt_inds = -1 # others, iou < 0.4 -> assign_result.gt_inds = 0 ######################################################## # gt_masks = [gtm for gtm in gt_masks.masks] # if use_mod: # gt_rbboxes = cv2_mask2rbbox_mod(gt_masks) # else: gt_rbboxes = cv2_mask2rbbox_mod(gt_masks) gt_rbboxes = gt_bboxes.new_tensor(gt_rbboxes) gt_inclines = gt_rbboxes[:, -1] gt_rbboxes = gt_rbboxes[:, 0:-1] sampling_result = self.sampler.sample(assign_result, anchors, gt_rbboxes) ######################################################## num_valid_anchors = anchors.shape[0] bbox_targets = anchors.new_zeros((anchors.shape[0], 5)) bbox_weights = anchors.new_zeros((anchors.shape[0], 5)) labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds # inds是anchors的index,而非sampling_result.bboxex if len(pos_inds) > 0: if not self.reg_decoded_bbox: pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: pos_bbox_targets = sampling_result.pos_gt_bboxes ############################################################# bbox_targets[pos_inds, 0:5] = pos_bbox_targets pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 bbox_targets[pos_inds, 5] = gt_inclines[pos_assigned_gt_inds] bbox_weights[pos_inds, :] = 1.0 ############################################################# if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.num_classes) # fill bg label label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) # if torch.sum(torch.isinf(bbox_targets).flatten()) > 0: # print(bbox_targets) if torch.sum(torch.isinf(bbox_targets)): raise AssertionError return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds, sampling_result)
def atss_target_single(self, flat_anchors, valid_flags, num_level_anchors, gt_bboxes, gt_masks, gt_bboxes_ignore, gt_labels, img_meta, cfg, target_means, target_stds, label_channels=1, unmap_outputs=True, with_module=False, hbb_trans='hbb2obb_v2'): inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], cfg.allowed_border) if not inside_flags.any(): return (None, ) * 6 # assign gt and sample anchors anchors = flat_anchors[inside_flags.type(torch.bool), :] num_level_anchors_inside = self.get_num_level_anchors_inside( num_level_anchors, inside_flags) bbox_assigner = build_assigner(cfg.assigner) assign_result = bbox_assigner.assign(anchors, num_level_anchors_inside, gt_bboxes, gt_bboxes_ignore, gt_labels) bbox_sampler = PseudoSampler() sampling_result = bbox_sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) ## 自己加的 rbbox_targets = torch.zeros(num_valid_anchors, 5).to(anchors.device) rbbox_weights = torch.zeros(num_valid_anchors, 5).to(anchors.device) ## labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds ## 自己加的 pos_assigned_gt_inds = sampling_result.pos_assigned_gt_inds gt_obbs = gt_mask_bp_obbs(gt_masks, with_module) gt_obbs_ts = torch.from_numpy(gt_obbs).to( sampling_result.pos_bboxes.device) pos_gt_obbs_ts = gt_obbs_ts[pos_assigned_gt_inds] ## if len(pos_inds) > 0: ## 自己加的 pos_ext_rbboxes = hbb2obb_v2(sampling_result.pos_bboxes) if with_module: pos_rbbox_targets = dbbox2delta(pos_ext_rbboxes, pos_gt_obbs_ts, target_means, target_stds) else: pos_rbbox_targets = dbbox2delta_v3(pos_ext_rbboxes, pos_gt_obbs_ts, target_means, target_stds) ## pos_bbox_targets = bbox2delta(sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes, self.target_means[:4], self.target_stds[:4]) bbox_targets[pos_inds, :] = pos_bbox_targets ## 自己加的 rbbox_targets[pos_inds, :] = pos_rbbox_targets rbbox_weights[pos_inds, :] = 1.0 ## bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: inside_flags = inside_flags.type(torch.bool) num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) ## 自己加的 rbbox_targets = unmap(rbbox_targets, num_total_anchors, inside_flags) rbbox_weights = unmap(rbbox_weights, num_total_anchors, inside_flags) ## return (anchors, labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds, rbbox_targets, rbbox_weights)
def train_detector(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] if 'imgs_per_gpu' in cfg.data: logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 'Please use "samples_per_gpu" instead') if 'samples_per_gpu' in cfg.data: logger.warning( f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' f'={cfg.data.imgs_per_gpu} is used in this experiments') else: logger.warning( 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' f'{cfg.data.imgs_per_gpu} in this experiments') cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed) for ds in dataset ] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: model = MMDataParallel( model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) if 'runner' not in cfg: cfg.runner = { 'type': 'EpochBasedRunner', 'max_epochs': cfg.total_epochs } warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) else: if 'total_epochs' in cfg: assert cfg.total_epochs == cfg.runner.max_epochs runner = build_runner( cfg.runner, default_args=dict( model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # an ugly workaround to make .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook( **cfg.optimizer_config, **fp16_cfg, distributed=distributed) elif distributed and 'type' not in cfg.optimizer_config: optimizer_config = OptimizerHook(**cfg.optimizer_config) else: optimizer_config = cfg.optimizer_config # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) if distributed: if isinstance(runner, EpochBasedRunner): runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: # Support batch_size > 1 in validation val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1) if val_samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.val.pipeline = replace_ImageToTensor( cfg.data.val.pipeline) val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=val_samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) # user-defined hooks if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks assert isinstance(custom_hooks, list), \ f'custom_hooks expect list type, but got {type(custom_hooks)}' for hook_cfg in cfg.custom_hooks: assert isinstance(hook_cfg, dict), \ 'Each item in custom_hooks expects dict type, but got ' \ f'{type(hook_cfg)}' hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) # runner.run(data_loaders, cfg.workflow) anchor_generator = build_anchor_generator(cfg.model.rpn_head.anchor_generator) assigner = build_assigner(cfg.model.train_cfg.rpn.assigner) total_num_targets = torch.tensor([0] * 5) for iteration, data in enumerate(data_loaders): for i in data: # print(i.keys()) img_metas = i['img_metas']._data # print(img_metas) num_imgs = len(img_metas) images = i['img']._data gt_bboxes = i['gt_bboxes']._data h, w = images[0].size()[-2:] features_shape = [] for i in range(2, 7): f_shape = [int(h/(2**i)), int(w/(2**i))] features_shape.append(f_shape) multi_level_anchors = anchor_generator.grid_anchors( features_shape) anchor_list = [multi_level_anchors for _ in range(num_imgs)] # for each image, we compute valid flags of multi level anchors valid_flag_list = [] for img_id, img_meta in enumerate(img_metas): multi_level_flags = anchor_generator.valid_flags( features_shape, img_meta[0]['pad_shape']) valid_flag_list.append(multi_level_flags) # print(anchor_list, valid_flag_list) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors to a single tensor concat_anchor_list = [] concat_valid_flag_list = [] for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) concat_anchor_list.append(torch.cat(anchor_list[i])) concat_valid_flag_list.append(torch.cat(valid_flag_list[i])) gt_bboxes_ignore_list= None # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] inside_flags = anchor_inside_flags(concat_anchor_list[0], concat_valid_flag_list[0], img_metas[0][0]['img_shape'][:2], 0) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = concat_anchor_list[0][inside_flags, :] assign_result = assigner.assign( anchors.cpu(), gt_bboxes[0][0], gt_bboxes_ignore_list[0], None) print(assign_result.pos_gt_bboxes) pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) labels = anchors.new_full((anchors.shape[0], ), -1, dtype=torch.long) labels[pos_inds] = 1 num_total_anchors = concat_anchor_list[0].size(0) labels = unmap( labels, num_total_anchors, inside_flags, fill=-1) # fill bg label match_results = images_to_levels([labels], num_level_anchors) # print(match_results) for idx, match_result in enumerate(match_results): num = torch.where(match_result==1)[0].numel() total_num_targets[idx] += num # print(total_num_targets) print(total_num_targets) print(total_num_targets)