def cls_target2(img_shape, all_anchors, bboxes, gt_class_ids): """ :param img_shape: :param bboxes: :param gt_class_ids: :return: """ # 返回值是[批数,anchor数,(x1, y1, x2, y2)],相对输入图片的像素坐标 # anchors = generate_pyramid_anchors(batch_size, resolution, input_shape, smallest_anchor_size) # all_anchors = anchors[0] # 只需要取第一批, [num, (x1, y1, x2, y2)] # 在图片里面 inside = ((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < img_shape[1] + allowed_border) & (all_anchors[:, 3] < img_shape[0] + allowed_border)) num_anchors = all_anchors.shape[0] rpn_labels = np.empty(shape=(num_anchors, ), dtype=np.int32) rpn_labels.fill(-1) anchor_deltas = np.zeros(shape=(num_anchors, 4), dtype=np.float32) # 有的bounding box可能框住了多个实例,标签就是-1 crowd_ix = np.where(gt_class_ids < 0)[0] if crowd_ix.shape[0] > 0: non_crowd_ix = np.where(gt_class_ids > 0)[0] crowd_boxes = bboxes[crowd_ix] gt_boxes = bboxes[non_crowd_ix] # 计算anchor与crowd的iou,如果与crowd的iou过大,那这个anchor不进行训练 crowd_overlaps = bbox_overlaps( np.ascontiguousarray(all_anchors, dtype=np.float), np.ascontiguousarray(crowd_boxes, dtype=np.float)) crowd_iou_max = np.amax(crowd_overlaps, axis=1) # 长度是所有anchor的个数 no_crowd_bool = (crowd_iou_max < 0.001) else: no_crowd_bool = np.ones(shape=(num_anchors, ), dtype=bool) gt_boxes = bboxes if gt_boxes.shape[0] > 0: overlaps = bbox_overlaps( np.ascontiguousarray(all_anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) # 长度为num_anchors max_overlaps = overlaps[np.arange(num_anchors), argmax_overlaps] # 将iou小于0.3并且没有与crowd相交的,设置为0,表示负例 rpn_labels[(max_overlaps < neg_anchor_thresh) & no_crowd_bool & inside] = 0 rpn_labels[(max_overlaps >= posi_anchor_thresh) & inside] = 1 # 对于某个GT而言,即使所有anchor与他的iou都小于0.3,也需要把与之iou最大的那个设置为正例 gt_iou_argmax = np.argmax(overlaps, axis=0) rpn_labels[gt_iou_argmax] = 1 pos_ids = np.where(rpn_labels == 1)[0] # 不能让正例超过一半 extra = len(pos_ids) - RPN_TRAIN_ANCHORS_PER_IMAGE // 2 if extra > 0: rpn_labels[np.random.choice(pos_ids, extra, replace=False)] = -1 pos_ids = np.where(rpn_labels == 1)[0] pos_anchor = all_anchors[pos_ids] for i, a in zip(pos_ids, pos_anchor): gt = gt_boxes[argmax_overlaps[i]] gt_h = gt[3] - gt[1] gt_w = gt[2] - gt[0] gt_ctr_x = gt[0] + 0.5 * gt_w gt_ctr_y = gt[1] + 0.5 * gt_h an_h = a[3] - a[1] an_w = a[2] - a[0] an_ctr_x = a[0] + 0.5 * an_w an_ctr_y = a[1] + 0.5 * an_h if gt_h <= 0.00001 or an_h <= 0.00001 or gt_w <= 0.00001 or an_w <= 0.00001: print(gt_h, an_h, gt_w, an_w, "invalid anchor or gt") exit(0) anchor_deltas[i] = [(gt_ctr_x - an_ctr_x) / an_w, (gt_ctr_y - an_ctr_y) / an_h, np.log(gt_h / an_h), np.log(gt_w / an_w)] anchor_deltas[i] /= RPN_BBOX_STD_DEV nan_count = np.isnan(anchor_deltas[i]).sum() if nan_count > 0: print("++++++++++++++++++++++") print(anchor_deltas) print("-----------------------") exit(0) neg_ids = np.where(rpn_labels == 0)[0] extra = len(neg_ids) - (RPN_TRAIN_ANCHORS_PER_IMAGE - len(pos_ids)) if extra > 0: rpn_labels[np.random.choice(neg_ids, extra, replace=False)] = -1 # neg_ids = np.where(rpn_labels == 0)[0] else: rpn_labels[np.random.choice(num_anchors, RPN_TRAIN_ANCHORS_PER_IMAGE, replace=False)] = 0 return rpn_labels, anchor_deltas
def cls_target(img_shape, bboxes): scales = np.array(anchor_scales).reshape((-1, 1)) ratios = np.array(anchor_ratios) all_scales = (scales * ratios).reshape(-1) labels = [] targets = [] anchors = [] num_anchors = len(all_scales) for feat_stride in feat_strides: per_cell_anchor = np.zeros([num_anchors, 4], dtype=np.float32) per_cell_anchor[:, 0] = (feat_stride - 1) / 2 - all_scales / 2 # xmin per_cell_anchor[:, 2] = (feat_stride - 1) / 2 + all_scales / 2 # xmax per_cell_anchor[:, 1] = (feat_stride - 1) / 2 - all_scales / 2 # ymin per_cell_anchor[:, 3] = (feat_stride - 1) / 2 + all_scales / 2 # ymax fm_h = img_shape[0] // feat_stride fm_w = img_shape[1] // feat_stride # every predict feature map pixel has num_scales anchors, # each anchor has a label, as well as the target label = np.empty((fm_h * fm_w * num_anchors, ), dtype=np.int8) label.fill(-1) target = np.empty((fm_h * fm_w * num_anchors, 4), dtype=np.float32) shift_x = np.arange(0, fm_w) * feat_stride shift_y = np.arange(0, fm_h) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() all_anchors = (per_cell_anchor.reshape( (1, num_anchors, 4)) + shifts.reshape( (1, fm_h * fm_w, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((fm_h * fm_w * num_anchors, 4)) overlaps = bbox_overlaps( np.ascontiguousarray(all_anchors, dtype=np.float), np.ascontiguousarray(bboxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) # print(argmax_overlaps.shape) max_overlaps = overlaps[np.arange(all_anchors.shape[0]), argmax_overlaps] # print(max_overlaps) all_pos_idx = np.where(max_overlaps > posi_anchor_thresh)[0] all_neg_idx = np.where((max_overlaps < neg_anchor_thresh) & (max_overlaps > 0))[0] if batch_anchor_num * positive_ratio > len(all_pos_idx): posi_idx = all_pos_idx else: posi_idx = npr.choice(all_pos_idx, int(batch_anchor_num * positive_ratio)) neg_idx = npr.choice( all_neg_idx, int(len(posi_idx) * (1 - positive_ratio) / positive_ratio)) # TODO some feature map have no posi anchors label[posi_idx] = 1 label[neg_idx] = 0 if len(neg_idx) == 0: neg_idx = npr.choice(all_neg_idx, int(batch_anchor_num * (1 - positive_ratio))) label[neg_idx] = 0 box_target = np.zeros((all_anchors.shape[0], 4), dtype=np.float64) # pos_idx 存放的是要训练的正例的 在all_anchor中的索引 # argmax_overlaps 存放的是每个anchor与所有gt_text交集中最大的gt在text_proposal_gt中的索引, 长度是所有anchor的个数 # text_proposal_gt 存放所有的gt_text_proposal posi_target = bboxes[argmax_overlaps[posi_idx]] box_target[posi_idx] = posi_target # print(posi_idx) labels.append(label) targets.append(target) anchors.append(all_anchors) return labels, targets, anchors
def corner_py(corner_pred_score, corner_pred_offset, gt_default_box, scales, feat_stride, img_info): # TODO 要把输入的tensor 转换一下 """(num_scales, 4) gt_default_box: (4, every corner box number, 4) : 0 left top, : 1 right top : 2 right bottom : 3 left bottom """ assert corner_pred_score.shape[0] == 1, \ 'Only single item batches are supported' # q num_scales = len(scales) per_cell_db = np.array([[0 for _ in scales], [0 for _ in scales], scales, scales], np.int32).transpose() height, width = corner_pred_score.shape[1:3] shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order padding_zeros = np.zeros(len(shift_x), np.int32) # TODO 每个default box 的表示方法(x,y,ss,ss)ss为scale shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), padding_zeros, padding_zeros)).transpose() """shift_x 是展平后的x值 每个x值对应一个pixel的横坐标""" all_pixel_num = shift_x.shape[0] # the number of pixel in feature map """为每个cell都生成了num_scale 个 default box all_default_box shape (H*W,num_scales, 4) """ all_default_box = (per_cell_db.reshape((1, num_scales, 4)) + shifts.reshape((1, all_pixel_num, 4)).transpose((1, 0, 2))) """ all_defalut_box (H*W*num_scales, 4)""" all_default_box = all_default_box.reshape(all_pixel_num * num_scales, 4) """filter the db out of the image""" idx_indside = np.where( (all_default_box[:, 0] - all_default_box[:, 2] / 2) >= 0 & (all_default_box[:, 0] + all_default_box[:, 2] / 2 <= img_info[1]) & (all_default_box[:, 1] - all_default_box[:, 3] / 2 >= 0) & (all_default_box[:, 1] + all_default_box[:, 3] / 2 <= img_info[0]) )[0] default_boxes = all_default_box[idx_indside, :] """给 每个default box 匹配一个 corner box""" """""" """ 对于每个default box ,需要计算它和每种corner box 真值的iou,来确定它是否是属于某种corner point """ # all_overlaps = np.zeros(()) # gt_default_box shape: (4, gt_text_num, 4) 0 for left_top ...etc valid_pixel_num = len(idx_indside) # 需要返回的 labels 为 (N H W num_scales q=4 1), 后面再reshape labels = np.empty((height, width, num_scales, 4, 1)) # 需要返回的 box_target 为 (N, H, W, num_scales, q, 4) 后面再reshape box_target = np.empty((height, width, num_scales, 4, 4)) labels.fill(-1) """ gt_default_box shape(4, num_gt_text, 4) """ for ix, gt_corner_box in enumerate(gt_default_box): # overlap 返回的 shape (valid_pixel_num * num_scales, gt_box_num) overlaps = bbox_overlaps( np.ascontiguousarray(default_boxes, dtype=np.float), np.ascontiguousarray(gt_corner_box, dtype=np.float)) # argmax_overlap (valid_pixel_num * num_scales, 1) argmax_overlaps = overlaps.argmax(axis=1) # 找到和每一个gtbox,overlap最大的那个db # valid_label 所有有效像素个数 * 每个像素上的scale个数 valid_label = np.empty((valid_pixel_num * num_scales,), np.int8) valid_label.fill(-1) max_overlaps = overlaps[np.arange(valid_pixel_num), argmax_overlaps] # 最大iou < 0.3 的设置为负例 valid_label[max_overlaps < cfg.TRAIN.NEGATIVE_OVERLAP] = 0 # cfg.TRAIN.RPN_POSITIVE_OVERLAP = 0.8 valid_label[max_overlaps >= cfg.TRAIN.POSITIVE_OVERLAP] = 1 # overlap大于0.8的认为是前景 per_kind_corner_label = np.empty((height * width * num_scales,), np.int8) per_kind_corner_label.fill(-1) per_kind_corner_label[idx_indside] = valid_label labels[:, :, :, ix, :] = per_kind_corner_label.reshape(height, width, num_scales, 1, 1) ########################### box target ################################## # TODO 对于每个真值是1的default box 需要它有回归目标 positive_inds = np.where(valid_label == 1)[0] per_kind_corner_target = np.empty((height * width * num_scales, 4), np.int32) per_kind_corner_target.fill(0) # argmax为每个default box对应iou最大的那个gt的下标,从中选出label是正的 per_kind_corner_target[positive_inds, :] = gt_corner_box[argmax_overlaps[positive_inds]] box_target[:, :, :, ix, :] = per_kind_corner_target.reshape(height, width, num_scales, 1, 4) num_fg = int(cfg.TRAIN.POSITIVE_RATIO * cfg.TRAIN.DEFAULT_BOX_NUM) # 0.25*300 flat_label = labels.reshape((height * width * num_scales * 4,)) fg_inds = np.where(flat_label == 1)[0] assert len(fg_inds) > 0, "The number of positive proposals must be lager than zero" if len(fg_inds) > num_fg: disable_inds = np.random.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) # 随机去除掉一些正样本 flat_label[disable_inds] = -1 # 变为-1 # subsample negative labels if we have too many # 对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是300,限制正样本数目最多150, num_bg = cfg.TRAIN.DEFAULT_BOX_NUM - np.sum(flat_label == 1) bg_inds = np.where(flat_label == 0)[0] assert len(bg_inds) > 0, "The number of negtive proposals must be lager than zero" if len(bg_inds) > num_bg: disable_inds = np.random.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) flat_label[disable_inds] = -1 """ labels (1, height, width, num_scales, 4, 1) box_target (1, height, width, num_scales, 4, 4) """ # return flat_label.reshape((1, height, width, num_scales, 4, 1)) return labels, box_target