def __call__(self, bbox, anchor, img_size): """Assign ground truth supervision to sampled subset of anchors. Types of input arrays and output arrays are same. Here are notations. * :math:`S` is the number of anchors. * :math:`R` is the number of bounding boxes. Args: bbox (array): Coordinates of bounding boxes. Its shape is :math:`(R, 4)`. anchor (array): Coordinates of anchors. Its shape is :math:`(S, 4)`. img_size (tuple of ints): A tuple :obj:`H, W`, which is a tuple of height and width of an image. Returns: (array, array): #NOTE: it's scale not only offset * **loc**: Offsets and scales to match the anchors to \ the ground truth bounding boxes. Its shape is :math:`(S, 4)`. * **label**: Labels of anchors with values \ :obj:`(1=positive, 0=negative, -1=ignore)`. Its shape \ is :math:`(S,)`. """ img_H, img_W = img_size n_anchor = len(anchor) inside_index = _get_inside_index(anchor, img_H, img_W) anchor = anchor[inside_index] argmax_ious, label = self._create_label( inside_index, anchor, bbox) # compute bounding box regression targets loc = bbox2loc(anchor, bbox[argmax_ious]) # map up to original set of anchors label = _unmap(label, n_anchor, inside_index, fill=-1) loc = _unmap(loc, n_anchor, inside_index, fill=0) return loc, label
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): """Assigns ground truth to sampled proposals. This function samples total of :obj:`self.n_sample` RoIs from the combination of :obj:`roi` and :obj:`bbox`. The RoIs are assigned with the ground truth class labels as well as bounding box offsets and scales to match the ground truth bounding boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are sampled as foregrounds. Offsets and scales of bounding boxes are calculated using :func:`model.utils.bbox_tools.bbox2loc`. Also, types of input arrays and output arrays are same. Here are notations. * :math:`S` is the total number of sampled RoIs, which equals \ :obj:`self.n_sample`. * :math:`L` is number of object classes possibly including the \ background. Args: roi (array): Region of Interests (RoIs) from which we sample. Its shape is :math:`(R, 4)` bbox (array): The coordinates of ground truth bounding boxes. Its shape is :math:`(R', 4)`. label (array): Ground truth bounding box labels. Its shape is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where :math:`L` is the number of foreground classes. loc_normalize_mean (tuple of four floats): Mean values to normalize coordinates of bouding boxes. loc_normalize_std (tupler of four floats): Standard deviation of the coordinates of bounding boxes. Returns: (array, array, array): * **sample_roi**: Regions of interests that are sampled. \ Its shape is :math:`(S, 4)`. * **gt_roi_loc**: Offsets and scales to match \ the sampled RoIs to the ground truth bounding boxes. \ Its shape is :math:`(S, 4)`. * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \ :math:`(S,)`. Its range is :math:`[0, L]`. The label with \ value 0 is the background. """ n_bbox, _ = bbox.shape roi = np.concatenate((roi, bbox), axis=0) pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) iou = bbox_iou(roi, bbox) gt_assignment = iou.argmax(axis=1) max_iou = iou.max(axis=1) # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class]. # The label with value 0 is the background. gt_roi_label = label[gt_assignment] + 1 # Select foreground RoIs as those with >= pos_iou_thresh IoU. pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice( pos_index, size=pos_roi_per_this_image, replace=False) # Select background RoIs as those within # [neg_iou_thresh_lo, neg_iou_thresh_hi). neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou >= self.neg_iou_thresh_lo))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int(min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice( neg_index, size=neg_roi_per_this_image, replace=False) # The indices that we're selecting (both positive and negative). keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 # negative labels --> 0 sample_roi = roi[keep_index] # Compute offsets and scales to match sampled RoIs to the GTs. gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32) ) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): """Assigns ground truth to sampled proposals. This function samples total of :obj:`self.n_sample` RoIs from the combination of :obj:`roi` and :obj:`bbox`. The RoIs are assigned with the ground truth class labels as well as bounding box offsets and scales to match the ground truth bounding boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are sampled as foregrounds. Offsets and scales of bounding boxes are calculated using :func:`model.utils.bbox_tools.bbox2loc`. Also, types of input arrays and output arrays are same. Here are notations. * :math:`S` is the total number of sampled RoIs, which equals \ :obj:`self.n_sample`. * :math:`L` is number of object classes possibly including the \ background. Args: roi (array): Region of Interests (RoIs) from which we sample. Its shape is :math:`(R, 4)` bbox (array): The coordinates of ground truth bounding boxes. Its shape is :math:`(R', 4)`. label (array): Ground truth bounding box labels. Its shape is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where :math:`L` is the number of foreground classes. loc_normalize_mean (tuple of four floats): Mean values to normalize coordinates of bouding boxes. loc_normalize_std (tupler of four floats): Standard deviation of the coordinates of bounding boxes. Returns: (array, array, array): * **sample_roi**: Regions of interests that are sampled. \ Its shape is :math:`(S, 4)`. * **gt_roi_loc**: Offsets and scales to match \ the sampled RoIs to the ground truth bounding boxes. \ Its shape is :math:`(S, 4)`. * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \ :math:`(S,)`. Its range is :math:`[0, L]`. The label with \ value 0 is the background. """ n_bbox, _ = bbox.shape roi = np.concatenate((roi, bbox), axis=0) pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) iou = bbox_iou(roi, bbox) gt_assignment = iou.argmax(axis=1) max_iou = iou.max(axis=1) # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class]. # The label with value 0 is the background. gt_roi_label = label[gt_assignment] + 1 # Select foreground RoIs as those with >= pos_iou_thresh IoU. pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False) # Select background RoIs as those within # [neg_iou_thresh_lo, neg_iou_thresh_hi). neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou > self.neg_iou_thresh_lo))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int( min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False) # The indices that we're selecting (both positive and negative). keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 sample_roi = roi[keep_index] # Compute offsets and scales to match sampled RoIs to the GTs gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): """Assigns ground truth to sampled proposals. This function samples total of :obj:`self.n_sample` RoIs from the combination of :obj:`roi` and :obj:`bbox`. The RoIs are assigned with the ground truth class labels as well as bounding box offsets and scales to match the ground truth bounding boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are sampled as foregrounds. Offsets and scales of bounding boxes are calculated using :func:`model.utils.bbox_tools.bbox2loc`. Also, types of input arrays and output arrays are same. Here are notations. * :math:`S` is the total number of sampled RoIs, which equals \ :obj:`self.n_sample`. * :math:`L` is number of object classes possibly including the \ background. Args: roi (array): Region of Interests (RoIs) from which we sample. Its shape is :math:`(R, 4)` # 目前我们选取的框 bbox (array): The coordinates of ground truth bounding boxes. Its shape is :math:`(R', 4)`. # 真实目标框的坐标 label (array): Ground truth bounding box labels. Its shape is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where :math:`L` is the number of foreground classes. # 真实目标框的label loc_normalize_mean (tuple of four floats): Mean values to normalize coordinates of bouding boxes. loc_normalize_std (tupler of four floats): Standard deviation of the coordinates of bounding boxes. Returns: (array, array, array): * **sample_roi**: Regions of interests that are sampled. \ Its shape is :math:`(S, 4)`. * **gt_roi_loc**: Offsets and scales to match \ the sampled RoIs to the ground truth bounding boxes. \ Its shape is :math:`(S, 4)`. * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \ :math:`(S,)`. Its range is :math:`[0, L]`. The label with \ value 0 is the background. """ n_bbox, _ = bbox.shape # roi = np.concatenate((roi, bbox), axis=0) # (R + R') * 4 # 都是在原图上进行操作, 可能是为了引入多一些的正例 roi = roi pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) # 选多少个框*有物体的框的比率 iou = bbox_iou(roi, bbox) # (R) * R' gt_assignment = iou.argmax(axis=1) # 输出每一行最大值的对应的坐标,也就是选出每个预测框对应的最大iou的实际框的索引 max_iou = iou.max(axis=1) # 输出每一行最大值,即每个预选框可以求得的最大iou # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class]. # The label with value 0 is the background. gt_roi_label = label[gt_assignment] + 1 # 由选出的和当前框算得最大iou的实际框,对应到他的label,训练集的标注是[0,n_class-1],我们在网络中使用的时候将其转化为[1, n_class],背景置为0 # Select foreground RoIs as those with >= pos_iou_thresh IoU. pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) # 如果大于阈值的框很多,则设置我们要选择的正类框数为要选的框 if pos_index.size > 0: pos_index = np.random.choice( pos_index, size=pos_roi_per_this_image, replace=False) # Select background RoIs as those within # [neg_iou_thresh_lo, neg_iou_thresh_hi). neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou >= self.neg_iou_thresh_lo))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int(min(neg_roi_per_this_image, neg_index.size)) # 选择负类的框 if neg_index.size > 0: neg_index = np.random.choice( neg_index, size=neg_roi_per_this_image, replace=False) # S = pos_roi_per_this_image + neg_roi_per_this_image <= 128 # The indices that we're selecting (both positive and negative). keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] # (R, ) => (S, ) gt_roi_label[pos_roi_per_this_image:] = 0 # negative labels --> 0 sample_roi = roi[keep_index] # Compute offsets and scales to match sampled RoIs to the GTs. gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32) ) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): """Assigns ground truth to sampled proposals. This function samples total of :obj:`self.n_sample` RoIs from the combination of :obj:`roi` and :obj:`bbox`. The RoIs are assigned with the ground truth class labels as well as bounding box offsets and scales to match the ground truth bounding boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are sampled as foregrounds. Offsets and scales of bounding boxes are calculated using :func:`model.utils.bbox_tools.bbox2loc`. Also, types of input arrays and output arrays are same. Here are notations. * :math:`S` is the total number of sampled RoIs, which equals \ :obj:`self.n_sample`. * :math:`L` is number of object classes possibly including the \ background. Args: roi (array): Region of Interests (RoIs) from which we sample. Its shape is :math:`(R, 4)` bbox (array): The coordinates of ground truth bounding boxes. Its shape is :math:`(R', 4)`. label (array): Ground truth bounding box labels. Its shape is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where :math:`L` is the number of foreground classes. loc_normalize_mean (tuple of four floats): Mean values to normalize coordinates of bouding boxes. loc_normalize_std (tupler of four floats): Standard deviation of the coordinates of bounding boxes. Returns: (array, array, array): * **sample_roi**: Regions of interests that are sampled. \ Its shape is :math:`(S, 4)`. * **gt_roi_loc**: Offsets and scales to match \ the sampled RoIs to the ground truth bounding boxes. \ Its shape is :math:`(S, 4)`. * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \ :math:`(S,)`. Its range is :math:`[0, L]`. The label with \ value 0 is the background. """ n_bbox, _ = bbox.shape # roi是rpn网络生成的候选区域 # bbox是ground truth # 这里要注意的是bbox区域也可以作为训练样本,所以这里将roi和bbox concat起来 roi = np.concatenate((roi, bbox), axis=0) pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) # 采样的正样本数量 # 每个roi对应每个bbox的IOU iou = bbox_iou(roi, bbox) # 每个roi对应iou最大的bbox的index gt_assignment = iou.argmax(axis=1) # 每个roi对应最大iou的值 max_iou = iou.max(axis=1) # 每个roi的label,0为背景类,所以别的类别都+1 gt_roi_label = label[gt_assignment] + 1 # 到这里我们得到了所有roi(包括bbox)的最大IOU值和他们的类别标签 # 在标注类别标签的时候我们并不关心它与bbox最接近 # Select foreground RoIs as those with >= pos_iou_thresh IoU. # 在大于IOU阈值的roi中选取正样本,为什么正样本比例设的这么低???,只有0.25 pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False) # Select background RoIs as those within # [neg_iou_thresh_lo, neg_iou_thresh_hi). # 在IOU区间内选择负样本,这里的iou区间是[0-0.5],我觉得0.5还挺高的??? neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou >= self.neg_iou_thresh_lo))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int( min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False) # The indices that we're selecting (both positive and negative). # 正类保留分类,负类标签置0 keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 # negative labels --> 0 sample_roi = roi[keep_index] # Compute offsets and scales to match sampled RoIs to the GTs. # 计算4个修正量作为位置回归的ground truth gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) / np.array(loc_normalize_std, np.float32)) # 这里似乎并不能保证选取出来的sample_roi数目一定是128个,因为极端情况下可以有很多不符合条件的roi,即不能选作正样本也不能选做负样本 # 具体训练时由于候选框很多,可能一般都能填满128吧 return sample_roi, gt_roi_loc, gt_roi_label
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): """Assigns ground truth to sampled proposals. 为抽样方案分配ground truth Args: roi (array):经过RPN网络的ProposalCreator的rois (R, 4) bbox (array): ground truth bounding boxes (R', 4)`. label (array): Ground truth bounding box labels (R',1 )`. 范围是[0, L - 1], L是类别数 loc_normalize_mean (tuple of four floats): bouding boxes的均值 loc_normalize_std (tupler of four floats): bounding boxes的方差 Returns: * **sample_roi**: 再次经过下采样的n_sample个rois(S, 4) S= n_sample * **gt_roi_loc**: sample_roi与与之匹配最近的真实坐标框的偏差sample_roi(S, 4)`. * **gt_roi_label**: gt_roi_loc所匹配的真实坐标框的类别 范围是[0, L - 1], L是类别数,O代表背景 """ n_bbox, _ = bbox.shape # ground truth bounding boxes. roi = np.concatenate((roi, bbox), axis=0) # 列变长,就是有更多的行,但是为什么要这样操作??????????? pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) iou = bbox_iou(roi, bbox) # 若roi为[a,4],bbox为[b,4],则bbox_iou为[a,b] gt_assignment = iou.argmax(axis=1) # 返回每行的最大值的索引 max_iou = iou.max(axis=1) # 返回每行的最大值 gt_roi_label = label[gt_assignment] + 1 """ gt_assignment为[(roi.shape[0]+bbox.shape[0]),1],(roi为原始的,即形参) label为[bbox.shape[0],1], 由于gt_assignment为每行的最大索引,故最大为num_truth_bounding_boxes. 因此label[gt_assignment]是合理的,切返回值为[(roi.shape[0]+bbox.shape[0]),1]size的 """ # 筛选出大于前景IoU阈值的RoI,并随机抽取一定数量的pos_RoI pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice( pos_index, size=pos_roi_per_this_image, replace=False) # 随机选择大于阈值的pos_roi_per_this_image个anchor # 筛选出满足IoU阈值的负类RoI,并随机抽取一定数量的neg_RoI neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou >= self.neg_iou_thresh_lo))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int( min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False) # The indices that we're selecting (both positive and negative). keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 # negative labels --> 0 sample_roi = roi[keep_index] # Compute offsets and scales to match sampled RoIs to the GTs. gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label