def generate_target(img_size, label, tensor_pred): """ generate the target of one image for the loss calculation. :param img: np.array, (h, w) :param label: np.array, int32, absolute, shape: (N, 5), [class_id, x1, y1, x2, y2] :param tensor_pred: np.array, float32, (S, S, (B*5 + C)) :return: target tensor, np.array, float32, (S, S, (B*5 + C)) """ tensor_pred = tensor_pred.copy() tensor_targ = np.zeros(tensor_pred.shape) height, width = img_size S = int(tensor_pred.shape[0]) B = int((tensor_pred.shape[-1] - 20) // 5) center_grids = get_center_grid_of_bboxes(img_size, label[:, 1:], S) # (N, 2), (row, column) for label_idx, center_grid in enumerate(center_grids): grid_row, grid_col = center_grid # box confidence boxes_pred = np.array([]) for i in range(B): temp_boxes_pred = tensor_pred[grid_row, grid_col, i*5:i*5+4] temp_boxes_pred = temp_boxes_pred.reshape((1, 4)) if boxes_pred.size == 0: boxes_pred = temp_boxes_pred else: boxes_pred = np.concatenate((boxes_pred, temp_boxes_pred)) boxes_rel = np.empty(boxes_pred.shape) for i, box in enumerate(boxes_pred): box_abs = translate_box_yolo_to_abs(img_size, box, center_grid, S) box_rel = myutils.bbox_abs_to_rel(box_abs, (height, width)) boxes_rel[i] = box_rel rltv_label_box = myutils.bbox_abs_to_rel(label[label_idx, 1:], img_size) iou = mx.nd.contrib.box_iou(mx.nd.array(boxes_rel), mx.nd.array(rltv_label_box.reshape((1, 4)))) idx_max_iou = np.argmax(iou.asnumpy()) tensor_targ[grid_row, grid_col, idx_max_iou*5+4] = 1 # box coordinates yolo_label = label[label_idx].copy().reshape((-1, 5)) yolo_label[:, 1:] = translate_box_abs_to_yolo(img_size, yolo_label[:, 1:], S) tensor_targ[grid_row, grid_col, idx_max_iou*5:idx_max_iou*5+4] = yolo_label.flatten()[1:] # class probability tensor_targ[grid_row, grid_col, int(B*5 + yolo_label.flatten()[0])] = 1 return tensor_targ
def _transform_fn(*data): """ This function is used as the parameter of dataset.transform(). The coords in mx_label is absolute coords. The processing procedure of image contains resize, augmentation, color_normalize, and to_tensor. The processing procedure of label just contains resize. :param data: (img, label), img: np.array, int, (h, w, c), label: absolute, np.array, int, (N, 5) :return: (mx_img, mx_label), mx_img: mx.nd.array, float, (c, h, w), mx_label: absolute, mx.nd.array, (N, 5) """ img, label = data img = img.astype('float32') / 255 # deepcopy label = label.astype('float32') aug_img, aug_label = myutils.data_augment(img, label, size=self.model_img_size, rb=0.0, rc=0.0, rh=0.0, rs=0.0, rflr=False, re=True, rcp=False) aug_img = mx.img.color_normalize(mx.nd.array(aug_img), mean=mx.nd.array(myutils.mean), std=mx.nd.array(myutils.std)) mx_img = myutils.to_tensor(aug_img) aug_label[:, 1:] = myutils.bbox_abs_to_rel(aug_label[:, 1:], mx_img.shape[-2:]) mx_label = mx.nd.array(aug_label) return mx_img, mx_label
def _hard_negative_mining(mx_img, mx_label, tensor_pred, anchors, pos_mask, neg_thresh=0.2): """ :param mx_img: (1, 3, h, w) :param mx_label: (1, N, 5), N:objects in the image, 5:(cls_id, xmin, xmax, ymin, ymax), absolute :param tensor_pred: mx.nd.array, shape:(1, P, A, C+1+4), P:number of positions, A:anchors on each position :param anchors: (P*A, 4), 4:(xmin, xmax, ymin, ymax), relative :param neg_thresh: threshold of IoU for labeled as negatives :return: """ P, A = tensor_pred.shape[1], tensor_pred.shape[2] # get negative indices label = mx_label.asnumpy()[0] label[:, 1:] = myutils.bbox_abs_to_rel(label[:, 1:], mx_img.shape[-2:]) ious = gcv.utils.bbox_iou(label[:, 1:], anchors) neg_masks = [] for iou in ious: neg_masks.append(iou < neg_thresh) neg_mask = np.full(anchors.shape[0], True) for mask in neg_masks: neg_mask *= mask neg_indices = np.where(neg_mask)[0] # shape: (P*A, ) num_negative = neg_indices.size # get positive indices pos_indices = np.where(pos_mask.flatten())[0] # shape: (P*A, ) num_positive = pos_indices.size # separate the positive indices from negative indices neg_indices = list(set(neg_indices) - set(pos_indices)) # sort the background confidence at raising order temp_tensor_pred = tensor_pred.reshape((1, -1, 25)) bg_conf_with_idx = np.array([[temp_tensor_pred[0, i, 0].asscalar(), i] for i in neg_indices]) # (N, 2) sorted_bg_conf_idx = np.argsort(bg_conf_with_idx[:, 0]) # pick hard negatives num_hard_negative = 3 * num_positive if num_hard_negative > num_negative: num_hard_negative = num_negative hard_neg_idx = np.array(neg_indices)[sorted_bg_conf_idx[:num_hard_negative]] # generate mask neg_mask = np.full((P*A, ), False) neg_mask[hard_neg_idx] = True return neg_mask # box_mask
def visualize_grids(img, label, S=7): """ Plot grids and label bounding boxes on the given image. :param img: np.array, uint8, (h, w, c) :param label: np.array, int32, (N, 5), N represents the id of bbox, the 5 represents (cls_id, x1, y1, x2, y2) :param S: the image is divided by S * S grids :return: fig, the figure on which plot. """ label = label.reshape((-1, 5)) fig = plt.imshow(img) axes = fig.axes height, width = img.shape[:2] x_interval = width / S y_interval = height / S grid_line_start_point = [] grid_line_end_point = [] for i in range(S+1): grid_line_start_point.append([x_interval * i, 0]) grid_line_end_point.append([x_interval * i, height]) grid_line_start_point.append([0, y_interval * i]) grid_line_end_point.append([width, y_interval * i]) for i in range(len(grid_line_start_point)): x_coords, y_coords = zip(*(grid_line_start_point[i], grid_line_end_point[i])) plt.plot(x_coords, y_coords, 'b-', linewidth=1) axes.set_xmargin(0) axes.set_ymargin(0) for obj_label in label: rltv_bbox = myutils.bbox_abs_to_rel(bbox=obj_label[1:], pic_size=img.shape[:2]) myutils._add_rectangle(axes, rltv_bbox) x_center, y_center = get_center_coord_of_bboxes(obj_label[1:])[0] plt.plot(x_center, y_center, 'r.', markersize=15) return fig
def visualize_pred(img, label, tensor_pred): """ Visualize the comparable boxes predicted for each ground-truth box. :param img: np.array, (h, w, c) :param label: np.array, (N, 5) :param tensor_pred: np.array, (S, S, B*5+20) :return: the figure that plots on """ img_size = img.shape[:2] S = int(tensor_pred.shape[0]) B = int((tensor_pred.shape[-1] - 20) // 5) center_grids = get_center_grid_of_bboxes(img_size, label[:, 1:], S) # (N, 2), (row, column) boxes_pstv = np.array([]) for center_grid in center_grids: grid_row, grid_col = center_grid boxes_pred = np.array([]) for i in range(B): temp_boxes_pred = tensor_pred[grid_row, grid_col, i*5:i*5+4].reshape((1, 4)) if boxes_pred.size == 0: boxes_pred = temp_boxes_pred else: boxes_pred = np.concatenate((boxes_pred, temp_boxes_pred)) for box_yolo in boxes_pred: box_abs = translate_box_yolo_to_abs(img_size, box_yolo, center_grid, S) if boxes_pstv.size == 0: boxes_pstv = box_abs.reshape((1, 4)) else: boxes_pstv = np.concatenate((boxes_pstv, box_abs.reshape((1, 4)))) fig = myutils.data_visualize(img, label[:, 1:]) axes = fig.axes[0] for box in boxes_pstv: box_rel = myutils.bbox_abs_to_rel(box, img_size) myutils._add_rectangle(axes, box_rel, 'blue') return fig
def _generate_target(mx_img, mx_label, anchors, do_hard_mining=False, tensor_pred=None, neg_thresh=0.2): """ 这个函数需要修改,但是不是现在 mx_img: mx.nd.array, (b, 3, h, w) mx_label: mx.nd.array, (b, N, 5), relative anchors: np.array, (1, P*A, 4), relative tensor_pred: mx.nd.array, (b, P*A, C+1+4), relative return: cls_targ, box_targ, pos_mask, neg_mask """ height, width = mx_img.shape[-2:] # label = mx_labels[0, :, :].asnumpy() # height, width = img_size gt_h_w = mx.nd.array([mx_label[:, 4] - label[:, 2], label[:, 3] - label[:, 1]]) # the height and width of ground truth boxes gt_h_w = gt_h_w.transpose((1, 0)) # (M, 2), 2:(height, width) scale = (gt_h_w[:, 0] * gt_h_w[:, 1]) / (height * width) # (M, ) pos_mask = np.full((anchors.shape[0], ), False) # (N, ) for gt_box in label[:, 1:]: # gt_box shape: (4, ) # strategy 1 rltv_gt_box = myutils.bbox_abs_to_rel(gt_box.reshape((-1, 4)), img_size) # (1, 4) ious = gcv.utils.bbox.bbox_iou(rltv_gt_box.asnumpy(), anchors) # (1, N) # ious = mx.nd.contrib.box_iou(mx.nd.array(rltv_gt_box), mx.nd.array(anchor)) ious = mx.nd.array(ious) max_iou_idx = mx.nd.argmax(ious) pos_mask[max_iou_idx] = True # strategy 2 ious = gcv.utils.bbox_iou(rltv_gt_box.asnumpy(), anchors) ious = ious[0] # (N, ) pos_mask = mx.nd.where(ious > 0.2, mx.nd.full(pos_mask.shape, True), pos_mask) # pos_mask[np.where(ious > 0.2)] = True mask_not_zero_idx = np.where(pos_mask == True)[0] box_target = mx.nd.zeros(anchors.shape) cls_target = mx.nd.zeros(anchors.shape[0]) for anchor_idx in mask_not_zero_idx: achr = anchors[anchor_idx] # (4, ) rltv_gt_boxes = myutils.bbox_abs_to_rel(label[:, 1:], img_size) ious = mx.nd.contrib.box_iou(mx.nd.array(rltv_gt_boxes), mx.nd.array(achr.reshape((-1, 4)))) ious = ious.asnumpy().flatten() # (M, ) max_iou_idx = np.argmax(ious) rltv_gt_box = rltv_gt_boxes[max_iou_idx] # (4, ) achr_center_x = np.mean(achr[[0, 2]]) achr_center_y = np.mean(achr[[1, 3]]) achr_h = achr[3] - achr[1] achr_w = achr[2] - achr[0] gt_center_x = np.mean(rltv_gt_box[[0, 2]]) gt_center_y = np.mean(rltv_gt_box[[1, 3]]) gt_h = rltv_gt_box[3] - rltv_gt_box[1] gt_w = rltv_gt_box[2] - rltv_gt_box[0] box_target[anchor_idx, 0] = (gt_center_x - achr_center_x) / achr_w / 0.1 box_target[anchor_idx, 1] = (gt_center_y - achr_center_y) / achr_h / 0.1 box_target[anchor_idx, 2] = np.log(gt_w / achr_w) / 0.2 box_target[anchor_idx, 3] = np.log(gt_h / achr_h) / 0.2 cls_target[anchor_idx] = label[max_iou_idx, 0] + 1 # (N, ) if not do_hard_mining: return box_target, pos_mask, cls_target # hard negative mining neg_mask = _hard_negative_mining(mx_img, mx_label, tensor_pred, anchor, pos_mask, neg_thresh) return cls_target, box_target, pos_mask, neg_mask