def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride = [16,], anchor_scales = [16,]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0]#9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print(np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0]#图像的高宽及通道数 #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 # Algorithm: # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3]#feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order # K is H x W shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()#生成feature-map和真实image上anchor之间的偏移量 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors#9个anchor K = shifts.shape[0]#50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))#相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image #仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :]#保留那些在图像内的anchor if DEBUG: print('anchors.shape', anchors.shape) #至此,anchor准备好了 #-------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1)#初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G #计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float))#假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax(axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0#先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1#每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1#overlap大于0.7的认为是前景 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[0] > 0:#这里我们暂时不考虑有doncare_area的存在 # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 #这里我们暂时不考虑难样本的问题 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many #对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False)#随机去除掉一些正样本 labels[disable_inds] = -1#变为-1 # subsample negative labels if we have too many #对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是256,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) # 至此, 上好标签,开始计算rpn-box的真值 #-------------------------------------------------------------- bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])#根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)#内部权重,前景就给1,其他是0 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:#暂时使用uniform 权重,也就是正样本是1,负样本是0 # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) + 1) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels == 1, :] = positive_weights#外部权重,前景是1,背景是0 bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means ** 2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1)#这些anchor的label是-1,也即dontcare bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)#这些anchor的真值是0,也即没有值 bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)#内部权重以0填充 bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)#外部权重以0填充 if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) # labels labels = labels.reshape((1, height, width, A))#reshap一下label rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4))#reshape rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors( scales=np.array(anchor_scales)) #生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0] #9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] #图像的高宽及通道数 #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 # Algorithm: # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] #feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride # (W) shift_y = np.arange(0, height) * _feat_stride #(H) shift_x, shift_y = np.meshgrid( shift_x, shift_y) # in W H order # shift_x (H, W) shift_y (H, W) # K is H x W shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel() )).transpose() #生成feature-map和真实image上anchor之间的偏移量 #(H*W, 4) # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors #9个anchor K = shifts.shape[0] #50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) #相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image #仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] #保留那些在图像内的anchor (In, 4) if DEBUG: print('anchors.shape', anchors.shape) #至此,anchor准备好了 #-------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) #初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G #计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps(np.ascontiguousarray( anchors, dtype=np.float), np.ascontiguousarray( gt_boxes, dtype=np.float)) #假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个anchor,overlap最大的那个gt max_overlaps = overlaps[np.arange( len(inds_inside) ), argmax_overlaps] # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值 gt_argmax_overlaps = overlaps.argmax( axis=0) # G#找到所有anchor中与gtbox,overlap最大的那个anchor # (3) if DEBUG: print('获取所有anchor中与gt相交最大的哪几个anchor的索引') print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape) print('gt_argmax_overlaps', gt_argmax_overlaps) gt_max_overlaps = overlaps[ gt_argmax_overlaps, np.arange( overlaps.shape[1] )] # 比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值 gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[ 0] # (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引 if DEBUG: print('这一步是找到那些同样与gt有最大overlap的索引,上一步找到的4个,这一步找到其他重复的') print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape) print('gt_argmax_overlaps', gt_argmax_overlaps) if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 #每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #overlap大于0.7的认为是前景 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 if DEBUG: print('在过滤数量之前:') print('正样本:', len(np.where(labels == 1)[0])) print('负样本:', len(np.where(labels == 0)[0])) print('忽略样本:', len(np.where(labels == -1)[0])) # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[ 0] > 0: #这里我们暂时不考虑有doncare_area的存在 # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 #这里我们暂时不考虑难样本的问题 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[ 0] > 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many #对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) #随机去除掉一些正样本 labels[disable_inds] = -1 #变为-1 # subsample negative labels if we have too many #对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是256,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) if DEBUG: print("考虑均衡住正负样本以后:") print('正样本:', len(np.where(labels == 1)[0])) print('负样本:', len(np.where(labels == 0)[0])) print('忽略样本:', len(np.where(labels == -1)[0])) # 至此, 上好标签,开始计算rpn-box的真值 #-------------------------------------------------------------- bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets( anchors, gt_boxes[argmax_overlaps, :]) #根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) #内部权重,前景就给1,其他是0 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: #暂时使用uniform 权重,也就是正样本是1,负样本是0 # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) + 1) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels == 1, :] = positive_weights # 外部权重,前景是1,背景是0 bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) #这些anchor的label是-1,也即dontcare bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) #这些anchor的真值是0,也即没有值 bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) #内部权重以0填充 bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) #外部权重以0填充 if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) # labels labels = labels.reshape((1, height, width, A)) #reshap一下label rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4))#reshape rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, rpn_cls_prob, im_name, gt_boxes_large, gt_ishard, dontcare_areas, im_info, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ 将gt_box划分为细框 实现论文中的side-refinement arameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- :return: """ global img_name if img_name != im_name: # 第一次训练这个图片 flag_first = True else: flag_first = False img_name = im_name if flag_first: # 如果是第一次见到这个图片,就要重新生成所有tmp对象 gt_boxes = split_frame(gt_boxes_large) # gt_width = gt_boxes[:,2]-gt_boxes[:,0] _anchors = generate_anchors( scales=np.array(anchor_scales)) # 生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0] # 9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 im_info = im_info[0] # 图像的高宽及通道数 assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride # (W) shift_y = np.arange(0, height) * _feat_stride # (H) shift_x, shift_y = np.meshgrid( shift_x, shift_y) # in W H order # shift_x (H, W) shift_y (H, W) # K is H x W shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel() )).transpose() # 生成feature-map和真实image上anchor之间的偏移量 #(H*W, 4) # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors # 9个anchor K = shifts.shape[0] # 50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) # 相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image # 仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] # 保留那些在图像内的anchor (In, 4) if DEBUG: print('anchors.shape', anchors.shape) # 至此,anchor准备好了 # -------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # 初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G # 计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray( gt_boxes, dtype=np.float)) # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个anchor,overlap最大的那个gt max_overlaps = overlaps[np.arange( len(inds_inside) ), argmax_overlaps] # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值 gt_argmax_overlaps = overlaps.argmax( axis=0) # G#找到所有anchor中与gtbox,overlap最大的那个anchor # (3) gt_max_overlaps = overlaps[ gt_argmax_overlaps, np.arange( overlaps.shape[1] )] # 比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值 gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[ 0] # (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引 if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # 先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # 每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # overlap大于0.7的认为是前景 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 before_filter_labels = labels.copy() # 过滤之前的标签,方便用来计算hard negtive all_bg_index = before_filter_labels == 0 if DEBUG: print('在过滤数量之前:') print('正样本:' + str(len(np.where(labels == 1)[0]))) print('负样本:' + str(len(np.where(labels == 0)[0]))) print('忽略样本:' + str(len(np.where(labels == -1)[0]))) # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[ 0] > 0: # 这里我们暂时不考虑有doncare_area的存在 # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 # 这里我们暂时不考虑难样本的问题 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[ 0] > 0 and 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[ hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many # 对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 # TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) # 随机去除掉一些正样本 labels[disable_inds] = -1 # 变为-1 # subsample negative labels if we have too many # 对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是512,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # print "was %s inds, disabling %s, now %s inds" % ( # len(bg_inds), len(disable_inds), np.sum(labels == 0)) if DEBUG: print("考虑均衡住正负样本以后:") print('正样本:' + str(len(np.where(labels == 1)[0]))) print('负样本:' + str(len(np.where(labels == 0)[0]))) print('忽略样本:' + str(len(np.where(labels == -1)[0]))) # 至此,第一次生成好了这个图片的labels,随机抽了512个 # 生成其他部分的标签 v_target, o_target = _compute_targets(anchors, gt_boxes[ argmax_overlaps, :]) # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) # 但是计算损失函数的时候,其实是需要j索引和k索引,所以计算好这两个索引,一并返回,帮助计算损失函数 # j索引,有效索引:正锚点或者与gt的overlap大于0.5以上的锚点的索引 # 正锚点 positive_index = np.where(labels == 1)[0] # 应该是一个(p,)p应该不大于128 # # ignore_index = np.where(labels==-1)[0] # 应该是一个(n,)n应该很大,因为忽略的anchor很多 keep_index = np.where(labels != -1)[0] _ = np.where( max_overlaps > 0.5)[0] # 应该是一个(c,),表示overlap大于0.5的anchor的索引 remove_ignore = list() for i in range(_.shape[0]): if i in keep_index: remove_ignore.append(_[i]) remove_ignore = np.array(remove_ignore) effect_index = np.append(positive_index, remove_ignore) remove_repeat = np.array(list(set(list(effect_index)))) j_index = remove_repeat.astype(np.int32) j_index1 = np.zeros((len(inds_inside)), dtype=np.int32) j_index1[j_index] = 1 # k 索引 , 边缘索引 # 先找到所有的可以认为是边缘的gt框,这里简单的认为是边缘框和左右各自一个。 # ori_gt_box = (gt_boxes/im_info[2]).astype(np.int32, copy=False) ori_gt_box = gt_boxes.astype(np.float32, copy=False) # 找到左右边界框,矩阵操作实现 todo list_left_index = list() list_right_index = list() for i in range(ori_gt_box.shape[0]): if ori_gt_box[i][2] - ori_gt_box[i][0] != 15: list_left_index.append(i) else: continue list_index1 = list_left_index + list_right_index # 去除不属于gt中的索引和重复的索引 list_index2 = list(set(list_index1)) list_index3 = sorted(list_index2) list_index4 = list() for index in list_index3: if index in range(ori_gt_box.shape[0]): list_index4.append(index) gt_side_index = np.array(list_index4).astype(np.int32) # 得到了边界gt框的索引 # 要得到与这些gt框有最大的overlap的anchors的索引,这些anchor是我们关心的 gt_argmax_overlaps = overlaps.argmax(axis=0) anchor_side_index = gt_argmax_overlaps[ gt_side_index] # 得到143个与gt具有最大的overlaps的anchor的索引 # 还要去掉与边界框overlap为0的anchor,因为这些anhcor不是真的我们关心的anchor,如果不去除,还会造成o_loss异常大 anchor_side_list = list() for i in range(anchor_side_index.shape[0]): anchor_index = anchor_side_index[i] gt_index = gt_side_index[i] overlap = overlaps[anchor_index, gt_index] if overlap > 0: anchor_side_list.append(anchor_index) anchor_side_index = np.array(anchor_side_list, dtype=np.int32) anchor_side_index1 = np.array( sorted(list(set(list(anchor_side_index))))).astype(np.int32) k_index = anchor_side_index1 # (s,) s个边界索引,但是并不是包括之前去除的超过边界框的索引值,所以需要之后的操作 k_index1 = np.zeros((len(inds_inside)), dtype=np.int32) k_index1[k_index] = 1 in_labels = labels.copy() # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # 这些anchor的label是-1,也即dontcare v_target = _unmap(v_target, total_anchors, inds_inside, fill=0) # 这些anchor的真值是0,也即没有值 o_target = _unmap(o_target, total_anchors, inds_inside, fill=0) j_index2 = _unmap(j_index1, total_anchors, inds_inside, fill=0).astype(np.int32) k_index2 = _unmap(k_index1, total_anchors, inds_inside, fill=0).astype(np.int32) # real_j_index = np.where(j_index2==1)[0] # real_k_index = np.where(k_index2==1)[0] global tmp_labels, tmp_all_bg_index, tmp_v_target, tmp_o_target, tmp_j_index2, tmp_k_index2, tmp_inds_inside tmp_labels = in_labels tmp_all_bg_index = all_bg_index tmp_v_target = v_target tmp_o_target = o_target tmp_j_index2 = j_index2 tmp_k_index2 = k_index2 tmp_inds_inside = inds_inside if DEBUG or SHOW_SOME: print('第一次这张图') print('正样本:' + str(len(np.where(labels == 1)[0]))) print('负样本:' + str(len(np.where(labels == 0)[0]))) print('忽略样本:' + str(len(np.where(labels == -1)[0]))) # print('保存的tmp_labels') # print('正样本:' + str(len(np.where(tmp_labels == 1)[0]))) # print('负样本:' + str(len(np.where(tmp_labels == 0)[0]))) # print('忽略样本:' + str(len(np.where(tmp_labels == -1)[0]))) return labels, v_target, o_target, j_index2, k_index2 else: # 第二次见过这个图,只用生成hard neg添加进去 if DEBUG and SHOW_SOME: print('不是第一次') # 先找出负样本 bg_index = tmp_all_bg_index inds_inside = tmp_inds_inside # 找出得分高于某个阈值的 rpn_cls_prob = np.reshape(rpn_cls_prob, [-1, 2]) rpn_cls_prob = rpn_cls_prob[inds_inside, :] fg_score = rpn_cls_prob[:, 1] high_score = fg_score > 0.5 # 找出即是负样本,又是分数很高的样本 assert bg_index.shape == high_score.shape # 得到了hard negtive的索引,这个是 hard_neg = bg_index * high_score if DEBUG: print('负样本的数量:' + str(len(np.where(bg_index == True)[0]))) print('得分高于0.5的数量:' + str(len(np.where(high_score == True)[0]))) print('hard negtive 数量' + str(len(np.where(hard_neg == True)[0]))) # 如果是第二次训练这张图片,训练样本是第一次随机生成的正负样本加这一次的hard negtive labels = tmp_labels.copy() first_gen_index = labels != -1 hard_neg_index = hard_neg assert first_gen_index.shape == hard_neg_index.shape, 'line 282' diff = hard_neg_index * 1 - first_gen_index * 1 new_hard_index = diff == 1 assert labels.shape == new_hard_index.shape if DEBUG: print('加载进来的第一次的labels的负样本的数量:' + str(len(np.where(labels == 0)[0]))) print('属于难负样本不属于第一次样本的数量:' + str(len(np.where(new_hard_index == True)[0]))) print('加难样本之前的负样本数量:' + str(len(np.where(labels == 0)[0]))) labels[new_hard_index] = 0 if DEBUG or SHOW_SOME: print('加难样本之后的负样本数量:' + str(len(np.where(labels == 0)[0]))) print('正样本:' + str(len(np.where(labels == 1)[0]))) print('负样本:' + str(len(np.where(labels == 0)[0]))) print('忽略样本:' + str(len(np.where(labels == -1)[0]))) # print('第一次保存的tmp_labels的负样本数量应该不变的'+ str(len(np.where(tmp_labels == 0)[0]))) # 至此,准备好了labels # 其他标签不变,加载上次保存的就好 v_target = tmp_v_target o_target = tmp_o_target j_index2 = tmp_j_index2 k_index2 = tmp_k_index2 return labels, v_target, o_target, j_index2, k_index2