def _point_target_single(self, flat_proposals, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, label_channels=1, stage='init', unmap_outputs=True): inside_flags = valid_flags if not inside_flags.any(): return (None, ) * 7 # assign gt and sample proposals proposals = flat_proposals[inside_flags, :] if stage == 'init': assigner = self.init_assigner pos_weight = self.train_cfg.init.pos_weight else: assigner = self.refine_assigner pos_weight = self.train_cfg.refine.pos_weight assign_result = assigner.assign(proposals, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) sampling_result = self.sampler.sample(assign_result, proposals, gt_bboxes) num_valid_proposals = proposals.shape[0] bbox_gt = proposals.new_zeros([num_valid_proposals, 4]) pos_proposals = torch.zeros_like(proposals) proposals_weights = proposals.new_zeros([num_valid_proposals, 4]) labels = proposals.new_full((num_valid_proposals, ), self.background_label, dtype=torch.long) label_weights = proposals.new_zeros(num_valid_proposals, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: pos_gt_bboxes = sampling_result.pos_gt_bboxes bbox_gt[pos_inds, :] = pos_gt_bboxes pos_proposals[pos_inds, :] = proposals[pos_inds, :] proposals_weights[pos_inds, :] = 1.0 if gt_labels is None: labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of proposals if unmap_outputs: num_total_proposals = flat_proposals.size(0) labels = unmap(labels, num_total_proposals, inside_flags) label_weights = unmap(label_weights, num_total_proposals, inside_flags) bbox_gt = unmap(bbox_gt, num_total_proposals, inside_flags) pos_proposals = unmap(pos_proposals, num_total_proposals, inside_flags) proposals_weights = unmap(proposals_weights, num_total_proposals, inside_flags) return (labels, label_weights, bbox_gt, pos_proposals, proposals_weights, pos_inds, neg_inds)
def _get_targets_single(self, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Most of the codes are the same with the base class :obj: `AnchorHead`, except that it also collects and returns the matched gt index in the image (from 0 to num_gt-1). If the anchor bbox is not matched to any gt, the corresponding value in pos_gt_inds is -1. """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # Assign gt and sample anchors anchors = flat_anchors[inside_flags.type(torch.bool), :] assign_result = self.assigner.assign( anchors, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.background_label, dtype=torch.long) label_weights = anchors.new_zeros((num_valid_anchors, label_channels), dtype=torch.float) pos_gt_inds = anchors.new_full((num_valid_anchors, ), -1, dtype=torch.long) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if not self.reg_decoded_bbox: pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 # The assigned gt_index for each anchor. (0-based) pos_gt_inds[pos_inds] = sampling_result.pos_assigned_gt_inds if gt_labels is None: # only rpn gives gt_labels as None, this time FG is 1 labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight # shadowed_labels is a tensor composed of tuples # (anchor_inds, class_label) that indicate those anchors lying in the # outer region of a gt or overlapped by another gt with a smaller # area. # # Therefore, only the shadowed labels are ignored for loss calculation. # the key `shadowed_labels` is defined in :obj:`CenterRegionAssigner` shadowed_labels = assign_result.get_extra_property('shadowed_labels') if shadowed_labels is not None and shadowed_labels.numel(): if len(shadowed_labels.shape) == 2: idx_, label_ = shadowed_labels[:, 0], shadowed_labels[:, 1] assert (labels[idx_] != label_).all(), \ 'One label cannot be both positive and ignored' # If background_label is 0. Then all labels increase by 1 label_ += int(self.background_label == 0) label_weights[idx_, label_] = 0 else: label_weights[shadowed_labels] = 0 if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) labels = unmap(labels, num_total_anchors, inside_flags) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) pos_gt_inds = unmap(pos_gt_inds, num_total_anchors, inside_flags, fill=-1) return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds, pos_gt_inds)
def _get_targets_single(self, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Args: flat_anchors (Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors ,4) valid_flags (Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). img_meta (dict): Meta info of the image. gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). img_meta (dict): Meta info of the image. gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: labels_list (list[Tensor]): Labels of each level label_weights_list (list[Tensor]): Label weights of each level bbox_targets_list (list[Tensor]): BBox targets of each level bbox_weights_list (list[Tensor]): BBox weights of each level num_total_pos (int): Number of positive samples in all images num_total_neg (int): Number of negative samples in all images """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] assign_result = self.assigner.assign( anchors, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.background_label, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if not self.reg_decoded_bbox: pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: # only rpn gives gt_labels as None, this time FG is 1 labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.background_label) # fill bg label label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds, sampling_result)
def _get_target_single(self, flat_anchors, valid_flags, num_level_anchors, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression, classification targets for anchors in a single image. Args: flat_anchors (Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors ,4) valid_flags (Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). num_level_anchors Tensor): Number of anchors of each scale level. gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). img_meta (dict): Meta info of the image. label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: N is the number of total anchors in the image. labels (Tensor): Labels of all anchors in the image with shape (N,). label_weights (Tensor): Label weights of all anchor in the image with shape (N,). bbox_targets (Tensor): BBox targets of all anchors in the image with shape (N, 4). bbox_weights (Tensor): BBox weights of all anchors in the image with shape (N, 4) pos_inds (Tensor): Indices of postive anchor with shape (num_pos,). neg_inds (Tensor): Indices of negative anchor with shape (num_neg,). """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] num_level_anchors_inside = self.get_num_level_anchors_inside( num_level_anchors, inside_flags) assign_result = self.assigner.assign(anchors, num_level_anchors_inside, gt_bboxes, gt_bboxes_ignore, gt_labels) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if hasattr(self, 'bbox_coder'): pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: # used in VFNetHead pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.num_classes) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) return (anchors, labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds)
def _get_targets_single(self, bbox_preds, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Args: bbox_preds (Tensor): Bbox prediction of the image, which shape is (h * w ,4) flat_anchors (Tensor): Anchors of the image, which shape is (h * w * num_anchors ,4) valid_flags (Tensor): Valid flags of the image, which shape is (h * w * num_anchors,). gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). img_meta (dict): Meta info of the image. gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: labels (Tensor): Labels of image, which shape is (h * w * num_anchors, ). label_weights (Tensor): Label weights of image, which shape is (h * w * num_anchors, ). pos_inds (Tensor): Pos index of image. neg_inds (Tensor): Neg index of image. sampling_result (obj:`SamplingResult`): Sampling result. pos_bbox_weights (Tensor): The Weight of using to calculate the bbox branch loss, which shape is (num, ). pos_predicted_boxes (Tensor): boxes predicted value of using to calculate the bbox branch loss, which shape is (num, 4). pos_target_boxes (Tensor): boxes target value of using to calculate the bbox branch loss, which shape is (num, 4). """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 8 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] bbox_preds = bbox_preds.reshape(-1, 4) bbox_preds = bbox_preds[inside_flags, :] # decoded bbox decoder_bbox_preds = self.bbox_coder.decode(anchors, bbox_preds) assign_result = self.assigner.assign( decoder_bbox_preds, anchors, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) pos_bbox_weights = assign_result.get_extra_property('pos_idx') pos_predicted_boxes = assign_result.get_extra_property( 'pos_predicted_boxes') pos_target_boxes = assign_result.get_extra_property('target_boxes') sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.num_classes) # fill bg label label_weights = unmap(label_weights, num_total_anchors, inside_flags) return (labels, label_weights, pos_inds, neg_inds, sampling_result, pos_bbox_weights, pos_predicted_boxes, pos_target_boxes)
def _ga_shape_target_single(self, flat_approxs, inside_flags, flat_squares, gt_bboxes, gt_bboxes_ignore, img_meta, unmap_outputs=True): """Compute guided anchoring targets. This function returns sampled anchors and gt bboxes directly rather than calculates regression targets. Args: flat_approxs (Tensor): flat approxs of a single image, shape (n, 4) inside_flags (Tensor): inside flags of a single image, shape (n, ). flat_squares (Tensor): flat squares of a single image, shape (approxs_per_octave * n, 4) gt_bboxes (Tensor): Ground truth bboxes of a single image. img_meta (dict): Meta info of a single image. approxs_per_octave (int): number of approxs per octave cfg (dict): RPN train configs. unmap_outputs (bool): unmap outputs or not. Returns: tuple """ if not inside_flags.any(): return (None, ) * 5 # assign gt and sample anchors expand_inside_flags = inside_flags[:, None].expand( -1, self.approxs_per_octave).reshape(-1) approxs = flat_approxs[expand_inside_flags, :] squares = flat_squares[inside_flags, :] assign_result = self.ga_assigner.assign(approxs, squares, self.approxs_per_octave, gt_bboxes, gt_bboxes_ignore) sampling_result = self.ga_sampler.sample(assign_result, squares, gt_bboxes) bbox_anchors = torch.zeros_like(squares) bbox_gts = torch.zeros_like(squares) bbox_weights = torch.zeros_like(squares) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: bbox_anchors[pos_inds, :] = sampling_result.pos_bboxes bbox_gts[pos_inds, :] = sampling_result.pos_gt_bboxes bbox_weights[pos_inds, :] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_squares.size(0) bbox_anchors = unmap(bbox_anchors, num_total_anchors, inside_flags) bbox_gts = unmap(bbox_gts, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) return (bbox_anchors, bbox_gts, bbox_weights, pos_inds, neg_inds)
def _get_target_single(self, flat_approxs, inside_flags, flat_squares, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=None, sampling=True, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Args: flat_approxs (Tensor): flat approxs of a single image, shape (n, 4) inside_flags (Tensor): inside flags of a single image, shape (n, ). flat_squares (Tensor): flat squares of a single image, shape (approxs_per_octave * n, 4) gt_bboxes (Tensor): Ground truth bboxes of a single image, \ shape (num_gts, 4). gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). img_meta (dict): Meta info of the image. label_channels (int): Channel of label. sampling (bool): Sample Anchors or not. unmap_outputs (bool): unmap outputs or not. Returns: tuple: - labels_list (Tensor): Labels in a single image - label_weights (Tensor): Label weights in a single image - bbox_cls_targets (Tensor): BBox cls targets in a single image - bbox_cls_weights (Tensor): BBox cls weights in a single image - bbox_reg_targets (Tensor): BBox reg targets in a single image - bbox_reg_weights (Tensor): BBox reg weights in a single image - num_total_pos (int): Number of positive samples \ in a single image - num_total_neg (int): Number of negative samples \ in a single image """ if not inside_flags.any(): return (None, ) * 8 # assign gt and sample anchors expand_inside_flags = inside_flags[:, None].expand( -1, self.approxs_per_octave).reshape(-1) approxs = flat_approxs[expand_inside_flags, :] squares = flat_squares[inside_flags, :] assign_result = self.assigner.assign(approxs, squares, self.approxs_per_octave, gt_bboxes, gt_bboxes_ignore) sampling_result = self.sampler.sample(assign_result, squares, gt_bboxes) num_valid_squares = squares.shape[0] bbox_cls_targets = squares.new_zeros( (num_valid_squares, self.side_num * 4)) bbox_cls_weights = squares.new_zeros( (num_valid_squares, self.side_num * 4)) bbox_reg_targets = squares.new_zeros( (num_valid_squares, self.side_num * 4)) bbox_reg_weights = squares.new_zeros( (num_valid_squares, self.side_num * 4)) labels = squares.new_full((num_valid_squares, ), self.background_label, dtype=torch.long) label_weights = squares.new_zeros(num_valid_squares, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: (pos_bbox_reg_targets, pos_bbox_reg_weights, pos_bbox_cls_targets, pos_bbox_cls_weights) = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) bbox_cls_targets[pos_inds, :] = pos_bbox_cls_targets bbox_reg_targets[pos_inds, :] = pos_bbox_reg_targets bbox_cls_weights[pos_inds, :] = pos_bbox_cls_weights bbox_reg_weights[pos_inds, :] = pos_bbox_reg_weights if gt_labels is None: labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_squares.size(0) labels = unmap( labels, num_total_anchors, inside_flags, fill=self.background_label) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_cls_targets = unmap(bbox_cls_targets, num_total_anchors, inside_flags) bbox_cls_weights = unmap(bbox_cls_weights, num_total_anchors, inside_flags) bbox_reg_targets = unmap(bbox_reg_targets, num_total_anchors, inside_flags) bbox_reg_weights = unmap(bbox_reg_weights, num_total_anchors, inside_flags) return (labels, label_weights, bbox_cls_targets, bbox_cls_weights, bbox_reg_targets, bbox_reg_weights, pos_inds, neg_inds)
def gfl_target_single(self, flat_anchors, valid_flags, num_level_anchors, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, cfg, label_channels=1, unmap_outputs=True): inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], cfg.allowed_border) if not inside_flags.any(): return (None, ) * 6 # assign gt and sample anchors anchors = flat_anchors[inside_flags.type(torch.bool), :] num_level_anchors_inside = self.get_num_level_anchors_inside( num_level_anchors, inside_flags) bbox_assigner = build_assigner(cfg.assigner) assign_result = bbox_assigner.assign(anchors, num_level_anchors_inside, gt_bboxes, gt_bboxes_ignore, gt_labels) bbox_sampler = PseudoSampler() sampling_result = bbox_sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: inside_flags = inside_flags.type(torch.bool) num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) return (anchors, labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds)
def _get_target_single(self, cls_scores, bbox_preds, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression, classification targets for anchors in a single image. Args: cls_scores (list(Tensor)): Box scores for each image. bbox_preds (list(Tensor)): Box energies / deltas for each image. flat_anchors (Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors ,4) valid_flags (Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). img_meta (dict): Meta info of the image. label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: N is the number of total anchors in the image. anchors (Tensor): All anchors in the image with shape (N, 4). labels (Tensor): Labels of all anchors in the image with shape (N,). label_weights (Tensor): Label weights of all anchor in the image with shape (N,). bbox_targets (Tensor): BBox targets of all anchors in the image with shape (N, 4). norm_alignment_metrics (Tensor): Normalized alignment metrics of all priors in the image with shape (N,). """ inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] assign_result = self.alignment_assigner.assign( cls_scores[inside_flags, :], bbox_preds[inside_flags, :], anchors, gt_bboxes, gt_bboxes_ignore, gt_labels, self.alpha, self.beta) assign_ious = assign_result.max_overlaps assign_metrics = assign_result.assign_metrics sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) norm_alignment_metrics = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: # point-based pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 class_assigned_gt_inds = torch.unique( sampling_result.pos_assigned_gt_inds) for gt_inds in class_assigned_gt_inds: gt_class_inds = pos_inds[sampling_result.pos_assigned_gt_inds == gt_inds] pos_alignment_metrics = assign_metrics[gt_class_inds] pos_ious = assign_ious[gt_class_inds] pos_norm_alignment_metrics = pos_alignment_metrics / ( pos_alignment_metrics.max() + 10e-8) * pos_ious.max() norm_alignment_metrics[gt_class_inds] = pos_norm_alignment_metrics # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.num_classes) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) norm_alignment_metrics = unmap(norm_alignment_metrics, num_total_anchors, inside_flags) return (anchors, labels, label_weights, bbox_targets, norm_alignment_metrics)
def _get_target_single(self, flat_anchors, valid_flags, num_level_anchors, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 6 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] num_level_anchors_inside = self.get_num_level_anchors_inside( num_level_anchors, inside_flags) assign_result = self.assigner.assign(anchors, num_level_anchors_inside, gt_bboxes, gt_bboxes_ignore, gt_labels) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) labels = anchors.new_full((num_valid_anchors, ), self.background_label, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.num_classes) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) return (anchors, labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds)
def _get_targets_single(self, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_masks, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Args: flat_anchors (Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors ,4) valid_flags (Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). gt_bboxes (Tensor): Ground truth bboxes of the image, shape (num_gts, 4). img_meta (dict): Meta info of the image. gt_bboxes_ignore (Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 4). img_meta (dict): Meta info of the image. gt_labels (Tensor): Ground truth labels of each box, shape (num_gts,). label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: labels_list (list[Tensor]): Labels of each level label_weights_list (list[Tensor]): Label weights of each level bbox_targets_list (list[Tensor]): BBox targets of each level bbox_weights_list (list[Tensor]): BBox weights of each level num_total_pos (int): Number of positive samples in all images num_total_neg (int): Number of negative samples in all images """ ## 使用水平框的gt_bboxes来进行anchor的assign ## 使用gt_masks来生成gt_rbboxes,并进行target的计算。 inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] assign_result = self.assigner.assign( anchors, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) # EXAMPLE # 28 pos , iou > 0.5 -> assign_result.gt_inds = 1 # 79 bad pos , 0.4< iou < 0.5 -> assign_result.gt_inds = -1 # others, iou < 0.4 -> assign_result.gt_inds = 0 ######################################################## # gt_masks = [gtm for gtm in gt_masks.masks] # if use_mod: # gt_rbboxes = cv2_mask2rbbox_mod(gt_masks) # else: gt_rbboxes = cv2_mask2rbbox_mod(gt_masks) gt_rbboxes = gt_bboxes.new_tensor(gt_rbboxes) gt_inclines = gt_rbboxes[:, -1] gt_rbboxes = gt_rbboxes[:, 0:-1] sampling_result = self.sampler.sample(assign_result, anchors, gt_rbboxes) ######################################################## num_valid_anchors = anchors.shape[0] bbox_targets = anchors.new_zeros((anchors.shape[0], 5)) bbox_weights = anchors.new_zeros((anchors.shape[0], 5)) labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds # inds是anchors的index,而非sampling_result.bboxex if len(pos_inds) > 0: if not self.reg_decoded_bbox: pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: pos_bbox_targets = sampling_result.pos_gt_bboxes ############################################################# bbox_targets[pos_inds, 0:5] = pos_bbox_targets pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 bbox_targets[pos_inds, 5] = gt_inclines[pos_assigned_gt_inds] bbox_weights[pos_inds, :] = 1.0 ############################################################# if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) labels = unmap(labels, num_total_anchors, inside_flags, fill=self.num_classes) # fill bg label label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) # if torch.sum(torch.isinf(bbox_targets).flatten()) > 0: # print(bbox_targets) if torch.sum(torch.isinf(bbox_targets)): raise AssertionError return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds, sampling_result)
def atss_target_single(self, flat_anchors, valid_flags, num_level_anchors, gt_bboxes, gt_masks, gt_bboxes_ignore, gt_labels, img_meta, cfg, target_means, target_stds, label_channels=1, unmap_outputs=True, with_module=False, hbb_trans='hbb2obb_v2'): inside_flags = anchor_inside_flags(flat_anchors, valid_flags, img_meta['img_shape'][:2], cfg.allowed_border) if not inside_flags.any(): return (None, ) * 6 # assign gt and sample anchors anchors = flat_anchors[inside_flags.type(torch.bool), :] num_level_anchors_inside = self.get_num_level_anchors_inside( num_level_anchors, inside_flags) bbox_assigner = build_assigner(cfg.assigner) assign_result = bbox_assigner.assign(anchors, num_level_anchors_inside, gt_bboxes, gt_bboxes_ignore, gt_labels) bbox_sampler = PseudoSampler() sampling_result = bbox_sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) ## 自己加的 rbbox_targets = torch.zeros(num_valid_anchors, 5).to(anchors.device) rbbox_weights = torch.zeros(num_valid_anchors, 5).to(anchors.device) ## labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds ## 自己加的 pos_assigned_gt_inds = sampling_result.pos_assigned_gt_inds gt_obbs = gt_mask_bp_obbs(gt_masks, with_module) gt_obbs_ts = torch.from_numpy(gt_obbs).to( sampling_result.pos_bboxes.device) pos_gt_obbs_ts = gt_obbs_ts[pos_assigned_gt_inds] ## if len(pos_inds) > 0: ## 自己加的 pos_ext_rbboxes = hbb2obb_v2(sampling_result.pos_bboxes) if with_module: pos_rbbox_targets = dbbox2delta(pos_ext_rbboxes, pos_gt_obbs_ts, target_means, target_stds) else: pos_rbbox_targets = dbbox2delta_v3(pos_ext_rbboxes, pos_gt_obbs_ts, target_means, target_stds) ## pos_bbox_targets = bbox2delta(sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes, self.target_means[:4], self.target_stds[:4]) bbox_targets[pos_inds, :] = pos_bbox_targets ## 自己加的 rbbox_targets[pos_inds, :] = pos_rbbox_targets rbbox_weights[pos_inds, :] = 1.0 ## bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: labels[pos_inds] = 1 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # map up to original set of anchors if unmap_outputs: inside_flags = inside_flags.type(torch.bool) num_total_anchors = flat_anchors.size(0) anchors = unmap(anchors, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags) label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) ## 自己加的 rbbox_targets = unmap(rbbox_targets, num_total_anchors, inside_flags) rbbox_weights = unmap(rbbox_weights, num_total_anchors, inside_flags) ## return (anchors, labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds, rbbox_targets, rbbox_weights)
def train_detector(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] if 'imgs_per_gpu' in cfg.data: logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 'Please use "samples_per_gpu" instead') if 'samples_per_gpu' in cfg.data: logger.warning( f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' f'={cfg.data.imgs_per_gpu} is used in this experiments') else: logger.warning( 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' f'{cfg.data.imgs_per_gpu} in this experiments') cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed) for ds in dataset ] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: model = MMDataParallel( model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) if 'runner' not in cfg: cfg.runner = { 'type': 'EpochBasedRunner', 'max_epochs': cfg.total_epochs } warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) else: if 'total_epochs' in cfg: assert cfg.total_epochs == cfg.runner.max_epochs runner = build_runner( cfg.runner, default_args=dict( model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # an ugly workaround to make .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook( **cfg.optimizer_config, **fp16_cfg, distributed=distributed) elif distributed and 'type' not in cfg.optimizer_config: optimizer_config = OptimizerHook(**cfg.optimizer_config) else: optimizer_config = cfg.optimizer_config # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) if distributed: if isinstance(runner, EpochBasedRunner): runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: # Support batch_size > 1 in validation val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1) if val_samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.val.pipeline = replace_ImageToTensor( cfg.data.val.pipeline) val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=val_samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) # user-defined hooks if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks assert isinstance(custom_hooks, list), \ f'custom_hooks expect list type, but got {type(custom_hooks)}' for hook_cfg in cfg.custom_hooks: assert isinstance(hook_cfg, dict), \ 'Each item in custom_hooks expects dict type, but got ' \ f'{type(hook_cfg)}' hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) # runner.run(data_loaders, cfg.workflow) anchor_generator = build_anchor_generator(cfg.model.rpn_head.anchor_generator) assigner = build_assigner(cfg.model.train_cfg.rpn.assigner) total_num_targets = torch.tensor([0] * 5) for iteration, data in enumerate(data_loaders): for i in data: # print(i.keys()) img_metas = i['img_metas']._data # print(img_metas) num_imgs = len(img_metas) images = i['img']._data gt_bboxes = i['gt_bboxes']._data h, w = images[0].size()[-2:] features_shape = [] for i in range(2, 7): f_shape = [int(h/(2**i)), int(w/(2**i))] features_shape.append(f_shape) multi_level_anchors = anchor_generator.grid_anchors( features_shape) anchor_list = [multi_level_anchors for _ in range(num_imgs)] # for each image, we compute valid flags of multi level anchors valid_flag_list = [] for img_id, img_meta in enumerate(img_metas): multi_level_flags = anchor_generator.valid_flags( features_shape, img_meta[0]['pad_shape']) valid_flag_list.append(multi_level_flags) # print(anchor_list, valid_flag_list) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors to a single tensor concat_anchor_list = [] concat_valid_flag_list = [] for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) concat_anchor_list.append(torch.cat(anchor_list[i])) concat_valid_flag_list.append(torch.cat(valid_flag_list[i])) gt_bboxes_ignore_list= None # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] inside_flags = anchor_inside_flags(concat_anchor_list[0], concat_valid_flag_list[0], img_metas[0][0]['img_shape'][:2], 0) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = concat_anchor_list[0][inside_flags, :] assign_result = assigner.assign( anchors.cpu(), gt_bboxes[0][0], gt_bboxes_ignore_list[0], None) print(assign_result.pos_gt_bboxes) pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) labels = anchors.new_full((anchors.shape[0], ), -1, dtype=torch.long) labels[pos_inds] = 1 num_total_anchors = concat_anchor_list[0].size(0) labels = unmap( labels, num_total_anchors, inside_flags, fill=-1) # fill bg label match_results = images_to_levels([labels], num_level_anchors) # print(match_results) for idx, match_result in enumerate(match_results): num = torch.where(match_result==1)[0].numel() total_num_targets[idx] += num # print(total_num_targets) print(total_num_targets) print(total_num_targets)