def _process_batch(data, size_index): W, H = cfg.multi_scale_out_size[size_index] inp_size = cfg.multi_scale_inp_size[size_index] out_size = cfg.multi_scale_out_size[size_index] bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data # net output hw, num_anchors, _ = bbox_pred_np.shape # gt _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) _boxes[:, :, 0:2] = 0.5 _boxes[:, :, 2:4] = 1.0 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) bbox_pred_np = np.expand_dims(bbox_pred_np, 0) bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 bbox_np = bbox_np[0] bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # for each cell, compare predicted_bbox and gt_bbox bbox_np_b = np.reshape(bbox_np, [-1, 4]) ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float)) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh] _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty # locate the cell of each gt_boxe cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # cx target_boxes[:, 1] = cy - np.floor(cy) # cy target_boxes[:, 2] = \ (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # tw target_boxes[:, 3] = \ (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # th # for each gt boxes, match the best anchor gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float)) anchor_inds = np.argmax(anchor_ious, axis=0) ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)]) for i, cell_ind in enumerate(cell_inds): if cell_ind >= hw or cell_ind < 0: print('cell inds size {}'.format(len(cell_inds))) print('cell over {} hw {}'.format(cell_ind, hw)) continue a = anchor_inds[i] # 0 ~ 1, should be close to 1 iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :] _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor) # noqa # _ious[cell_ind, a, :] = anchor_ious[a, i] _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i] _box_mask[cell_ind, a, :] = cfg.coord_scale target_boxes[i, 2:4] /= anchors[a] _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg.class_scale _classes[cell_ind, a, gt_classes[i]] = 1. # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask
def _process_batch(data, size_index): ''' 分析一下什么是多尺度的输出,这里指的是 pred 最后的size 为input/strides 通常strides 在这里是32 ''' W, H = cfg.multi_scale_out_size[size_index] inp_size = cfg.multi_scale_inp_size[size_index] out_size = cfg.multi_scale_out_size[size_index] bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data # net output hw, num_anchors, _ = bbox_pred_np.shape # gt _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) _boxes[:, :, 0:2] = 0.5 _boxes[:, :, 2:4] = 1.0 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) #用于预测的bbox 将其对bs 维度进行扩充,具体操作如下,1,w×h,number anchor,4 bbox_pred_np = np.expand_dims(bbox_pred_np, 0) ''' 其应该是来源于这个问题 bx = deta(tx) + cx by = deta(ty) + cy bw = pw*exp(tw) bh = ph*exp(th) ''' bbox_np = yolo_to_bbox( np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 bbox_np = bbox_np[0] bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # for each cell, compare predicted_bbox and gt_bbox #(w,h,anchors,4)---->(w*h*anchors,4) bbox_np_b = np.reshape(bbox_np, [-1, 4]) #计算预测的值和gt的overlap ious = bbox_ious( np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float) ) #这里计算完的iou 是500 个候选预测和num class 的交互比 (w*h*anchor,numclass) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh] _iou_mask[best_ious < cfg.iou_thresh] = cfg.noobject_scale * iou_penalty #iou_mask 存放的是他的 惩罚项目 # locate the cell of each gt_boxe ''' 计算每个cell 框所对应的大小 其实也就是一个predict,一格所代表的原图中的长宽 ''' cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H #中间值 ''' 表示的是样本中心所对中心所对应的框框所在位置在predict 中 ''' cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h ''' 这里的cell inds 是干嘛用的呢?我们接着往下看 ×××非常重要这个是核心步骤,找到我们的hw 中所对应的中间位置,太漂亮了0~100之间 ''' cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) ''' 这里应该是使用者写错了应该对应的是tx,ty ''' target_boxes[:, 0] = cx - np.floor(cx) # cx target_boxes[:, 1] = cy - np.floor(cy) # cy ''' 表达的是 gt 在predict 中应该有的位置,这个也是一个神秘操作 这个对应的应该是 bw,bh ''' target_boxes[:, 2] = \ (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # tw target_boxes[:, 3] = \ (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # th ''' 这一步操作是获取gt_ 和anchor 的交. 并且找到那个anchor 对那个gt 负责 ''' # for each gt boxes, match the best anchor gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float) ) anchor_inds = np.argmax(anchor_ious, axis=0) ''' cell_inds 对应的是num_class 的个数,也就是说所对应的objs的个数 ''' ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)]) ''' ious_reshaped 这里需要特别关注一下 (h*w,num_anchors,objects) 其中第一维度可以取出object中心所在位置 训练中的mask 对应的是其要乘的 scale 也可以被称之为 randa ''' for i, cell_ind in enumerate(cell_inds): if cell_ind >= hw or cell_ind < 0: print('cell inds size {}'.format(len(cell_inds))) print('cell over {} hw {}'.format(cell_ind, hw)) continue #找出对其负责的anchors 也即哪个anchor 对哪个object 负责 a = anchor_inds[i] # 0 ~ 1, should be close to 1 #预测的值于iou 的置信度 iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :] _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor) # noqa # _ious[cell_ind, a, :] = anchor_ious[a, i] #预测的值与gt的 ious _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i] _box_mask[cell_ind, a, :] = cfg.coord_scale ''' 这里为什么要除呢? bw = pw*exp(tw) --->所以除了之后会有 bw/pw = exp(tw) ,所以经过这一步操作之后会有 _boxes -->(tx,ty,exp(tw),exp(th)) ''' target_boxes[i, 2:4] /= anchors[a] _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg.class_scale _classes[cell_ind, a, gt_classes[i]] = 1. # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) ''' 这里整体整理一下操作的整个过程来梳理一下bbox 的操作 1.首先对应的是mask mask 对应的是损失函数中的系数,按照paper上和源码的初始设置,我们这里设置我们的 这里的scale 对应的是损失函数中的对应系数 object_scale = 5. noobject_scale = 1. class_scale = 1. coord_scale = 1. 2.首先我们对我们预测的bbox 回归到原图坐标,这个操作是根据yolo2bbox 来实现的 我们得到我们pred_boxes 然后我们对应的pred_boxes 于gt求得一个iou 这个iou 是我们的预测于真值之间的iou 其输出为 (h*w*anchor,gt_numbers) 我们可以求出对应的最好的iou 并且根据最好的iou 可以知道iou_mask 所对应的是损失函数为多少,其best iou 小于阈值的??这个得去看下yolov1 3. 根据gt_bbox 求出 对应的tx,ty,和bw,bh 记住源码中的注释是错误的这里纠正过来,并求得其中心位置的prior 的位置index 4. 求候选prior 和ground truth-->映射到feature map空间后的 iou ,这里我们可以求出, anchor_inds,这个anchor_inds 标记着 哪个anchor 于哪一类的iou 最大,这个anchor 需要对这个类负责 记住这个类对应的是映射空间最终可以得到一系列操作,其中包括样本的中心位置,已经anchor 对应object 位置 这样根据循环,我们对每个object 的_boxes(tx,ty,exp(tw),exp(th)),_ious(预测pred 和 gt ),_classes:全文0 则表示此为此为背景 ''' return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask
def _process_batch(data): W, H = cfg.out_size inp_size = cfg.inp_size out_size = cfg.out_size bbox_pred_np, gt_boxes, gt_classes, dontcares = data # net output hw, num_anchors, _ = bbox_pred_np.shape # gt _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) _boxes[:, :, 0:2] = 0.5 _boxes[:, :, 2:4] = 1.0 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) bbox_pred_np = np.expand_dims(bbox_pred_np, 0) bbox_np = yolo_to_bbox( np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) bbox_np = bbox_np[0] bbox_np[:, :, 0::2] *= float(inp_size[0]) bbox_np[:, :, 1::2] *= float(inp_size[1]) # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # for each cell bbox_np_b = np.reshape(bbox_np, [-1, 4]) ious = bbox_ious( np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float) ) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale # locate the cell of each gt_boxe cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # cx target_boxes[:, 1] = cy - np.floor(cy) # cy target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # tw target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # th # for each gt boxes, match the best anchor gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float) ) anchor_inds = np.argmax(anchor_ious, axis=0) for i, cell_ind in enumerate(cell_inds): if cell_ind >= hw or cell_ind < 0: print cell_ind continue a = anchor_inds[i] _iou_mask[cell_ind, a, :] = cfg.object_scale _ious[cell_ind, a, :] = anchor_ious[a, i] _box_mask[cell_ind, a, :] = cfg.coord_scale target_boxes[i, 2:4] /= anchors[a] _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg.class_scale _classes[cell_ind, a, gt_classes[i]] = 1. _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask
def _process_batch(data): bbox_pred_np, gt_boxes, gt_classes, iou_pred_np, inp_size, cfg = data out_size = inp_size / 32 num_gt = gt_boxes.shape[0] cell_w = 32 cell_h = 32 # net output hw, num_anchors, _ = bbox_pred_np.shape # hw = num_cell # gt _classes = np.zeros([hw, num_anchors, cfg['num_classes']], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) # _class_mask = np.ones([hw, num_anchors, 1], dtype=np.float) * cfg['class_scale'] _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) # _boxes[:, :, 0:2] = 0.5 # _boxes[:, :, 2:4] = 1.0 # debug mask_val # _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) # scale pred_bbox anchors = np.ascontiguousarray(cfg['anchors'], dtype=np.float) bbox_pred_np = np.expand_dims(bbox_pred_np, 0) bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, out_size[1], out_size[0]) bbox_np = bbox_np[ 0] # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x by w bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y by h # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # for each cell, compare predicted_bbox and gt_bbox bbox_np_b = np.reshape(bbox_np, [-1, 4]) ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float)) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) # _iou_mask[best_ious < cfg['iou_thresh']] = cfg['noobject_scale'] * 1 iou_penalty = 0 - iou_pred_np[best_ious < cfg['iou_thresh']] _iou_mask[ best_ious < cfg['iou_thresh']] = cfg['noobject_scale'] * iou_penalty ious_reshaped = np.reshape(ious, [hw, num_anchors, num_gt]) # locate the cell of each gt_boxes cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * out_size[0] + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # cx (0 ~ 1) target_boxes[:, 1] = cy - np.floor(cy) # cy (0 ~ 1) target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / cell_w # tw target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / cell_h # th # for each gt boxes, match the best anchor # gt_boxes_resize = [(xmin, ymin, xmax, ymax)] unit: cell px gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] /= cell_w gt_boxes_resize[:, 1::2] /= cell_h anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float)) anchor_inds = np.argmax(anchor_ious, axis=0) # for every gt cell for i, cell_ind in enumerate(cell_inds): if cell_ind >= hw or cell_ind < 0: print('warning: invalid cell_ind, cx, cy, W, H', cell_ind, cx[i], cy[i], out_size[0], out_size[1]) continue a = anchor_inds[i] # do not evaluate for dontcare / unknown class if gt_classes[i] == -1: continue iou_pred = iou_pred_np[cell_ind, a, :] # 0 ~ 1, should be close to iou_truth iou_truth = ious_reshaped[cell_ind, a, i] _iou_mask[cell_ind, a, :] = cfg['object_scale'] * (iou_truth - iou_pred) _ious[cell_ind, a, :] = iou_truth truth_w = (gt_boxes_b[i, 2] - gt_boxes_b[i, 0]) / inp_size[0] truth_h = (gt_boxes_b[i, 3] - gt_boxes_b[i, 1]) / inp_size[1] _box_mask[cell_ind, a, :] = cfg['coord_scale'] * (2 - truth_w * truth_h) target_boxes[i, 2:4] /= anchors[a] _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg['class_scale'] _classes[cell_ind, a, gt_classes[i]] = 1. # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) # _boxes = (sig(tx), sig(ty), exp(tw), exp(th)) return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask
def _process_batch(data, size_index): W, H = cfg.multi_scale_out_size[size_index] inp_size = cfg.multi_scale_inp_size[size_index] out_size = cfg.multi_scale_out_size[size_index] bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data # net output hw, num_anchors, _ = bbox_pred_np.shape # gt _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) _boxes[:, :, 0:2] = 0.5 _boxes[:, :, 2:4] = 1.0 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) bbox_pred_np = np.expand_dims(bbox_pred_np, 0) bbox_np = yolo_to_bbox( np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 bbox_np = bbox_np[0] bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # for each cell, compare predicted_bbox and gt_bbox bbox_np_b = np.reshape(bbox_np, [-1, 4]) ious = bbox_ious( np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float) ) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh] _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty # locate the cell of each gt_boxe cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # cx target_boxes[:, 1] = cy - np.floor(cy) # cy target_boxes[:, 2] = \ (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # tw target_boxes[:, 3] = \ (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # th # for each gt boxes, match the best anchor gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float) ) anchor_inds = np.argmax(anchor_ious, axis=0) ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)]) for i, cell_ind in enumerate(cell_inds): if cell_ind >= hw or cell_ind < 0: print('cell inds size {}'.format(len(cell_inds))) print('cell over {} hw {}'.format(cell_ind, hw)) continue a = anchor_inds[i] # 0 ~ 1, should be close to 1 iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :] _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor) # noqa # _ious[cell_ind, a, :] = anchor_ious[a, i] _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i] _box_mask[cell_ind, a, :] = cfg.coord_scale target_boxes[i, 2:4] /= anchors[a] _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg.class_scale _classes[cell_ind, a, gt_classes[i]] = 1. # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask
def _process_batch(data, size_index): W, H = cfg.multi_scale_out_size[size_index] inp_size = cfg.multi_scale_inp_size[size_index] out_size = cfg.multi_scale_out_size[size_index] bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data # net output hw, num_anchors, _ = bbox_pred_np.shape # gt _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) # _boxes[:, :, 0:2] = 0.5 # _boxes[:, :, 2:4] = 1.0 # _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) bbox_pred_np = np.expand_dims(bbox_pred_np, 0) bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 # 预测值转移到实际框的位置 bbox_np = bbox_np[0] bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x 乘以原图大小 bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # [R,4] # for each cell, compare predicted_bbox and gt_bbox, 这里是在image_input_size的层面比较的 bbox_np_b = np.reshape(bbox_np, [-1, 4]) ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float)) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) # iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh] # _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty # 小于阈值认为没有物体,将mask设为 -p _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale # locate the cell of each gt_boxe cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # bx - cx = sig(tx) target_boxes[:, 1] = cy - np.floor(cy) # by - cy = sig(ty) target_boxes[:, 2] = \ (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # bw target_boxes[:, 3] = \ (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # bh # for each gt boxes, match the best anchor # 将gt_boxes转化到特征图大小和anchor相比较 gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float)) # 这里假设他们的中心重合 anchor_inds = np.argmax(anchor_ious, axis=0) # 每个实际框对应的最佳锚框 ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)]) for i, cell_ind in enumerate( cell_inds): # 最后只会标注 gt_bbox所在的 cell, 和gt_bbox选出的最佳 anchor if cell_ind >= hw or cell_ind < 0: print('cell inds size {}'.format(len(cell_inds))) print('cell over {} hw {}'.format(cell_ind, hw)) continue a = anchor_inds[i] # 0 ~ 1, should be close to 1 # iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :] # _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor) # noqa _iou_mask[cell_ind, a, :] = cfg.object_scale # _ious[cell_ind, a, :] = anchor_ious[a, i] _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i] _box_mask[cell_ind, a, :] = cfg.coord_scale target_boxes[i, 2:4] /= anchors[a] # bw / bh _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg.class_scale _classes[cell_ind, a, gt_classes[i]] = 1. # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask