Ejemplo n.º 1
0
def generate_target(img_size, label, tensor_pred):
    """
    generate the target of one image for the loss calculation.

    :param img: np.array, (h, w)
    :param label: np.array, int32, absolute, shape: (N, 5), [class_id, x1, y1, x2, y2]
    :param tensor_pred: np.array, float32, (S, S, (B*5 + C))
    :return: target tensor, np.array, float32, (S, S, (B*5 + C))
    """
    tensor_pred = tensor_pred.copy()
    tensor_targ = np.zeros(tensor_pred.shape)

    height, width = img_size
    S = int(tensor_pred.shape[0])
    B = int((tensor_pred.shape[-1] - 20) // 5)
    center_grids = get_center_grid_of_bboxes(img_size, label[:, 1:], S)  # (N, 2), (row, column)

    for label_idx, center_grid in enumerate(center_grids):
        grid_row, grid_col = center_grid

        # box confidence
        boxes_pred = np.array([])
        for i in range(B):
            temp_boxes_pred = tensor_pred[grid_row, grid_col, i*5:i*5+4]
            temp_boxes_pred = temp_boxes_pred.reshape((1, 4))
            if boxes_pred.size == 0:
                boxes_pred = temp_boxes_pred
            else:
                boxes_pred = np.concatenate((boxes_pred, temp_boxes_pred))

        boxes_rel = np.empty(boxes_pred.shape)
        for i, box in enumerate(boxes_pred):
            box_abs = translate_box_yolo_to_abs(img_size, box, center_grid, S)
            box_rel = myutils.bbox_abs_to_rel(box_abs, (height, width))
            boxes_rel[i] = box_rel

        rltv_label_box = myutils.bbox_abs_to_rel(label[label_idx, 1:], img_size)

        iou = mx.nd.contrib.box_iou(mx.nd.array(boxes_rel), mx.nd.array(rltv_label_box.reshape((1, 4))))
        idx_max_iou = np.argmax(iou.asnumpy())

        tensor_targ[grid_row, grid_col, idx_max_iou*5+4] = 1

        # box coordinates
        yolo_label = label[label_idx].copy().reshape((-1, 5))
        yolo_label[:, 1:] = translate_box_abs_to_yolo(img_size, yolo_label[:, 1:], S)
        tensor_targ[grid_row, grid_col, idx_max_iou*5:idx_max_iou*5+4] = yolo_label.flatten()[1:]

        # class probability
        tensor_targ[grid_row, grid_col, int(B*5 + yolo_label.flatten()[0])] = 1

    return tensor_targ
Ejemplo n.º 2
0
        def _transform_fn(*data):
            """
            This function is used as the parameter of dataset.transform(). The coords in mx_label is absolute coords.
            The processing procedure of image contains resize, augmentation, color_normalize, and to_tensor.
            The processing procedure of label just contains resize.

            :param data: (img, label), img: np.array, int, (h, w, c), label: absolute, np.array, int, (N, 5)
            :return: (mx_img, mx_label), mx_img: mx.nd.array, float, (c, h, w), mx_label: absolute, mx.nd.array, (N, 5)
            """
            img, label = data
            img = img.astype('float32') / 255  # deepcopy
            label = label.astype('float32')

            aug_img, aug_label = myutils.data_augment(img,
                                                      label,
                                                      size=self.model_img_size,
                                                      rb=0.0,
                                                      rc=0.0,
                                                      rh=0.0,
                                                      rs=0.0,
                                                      rflr=False,
                                                      re=True,
                                                      rcp=False)
            aug_img = mx.img.color_normalize(mx.nd.array(aug_img),
                                             mean=mx.nd.array(myutils.mean),
                                             std=mx.nd.array(myutils.std))
            mx_img = myutils.to_tensor(aug_img)
            aug_label[:, 1:] = myutils.bbox_abs_to_rel(aug_label[:, 1:],
                                                       mx_img.shape[-2:])
            mx_label = mx.nd.array(aug_label)
            return mx_img, mx_label
Ejemplo n.º 3
0
def _hard_negative_mining(mx_img, mx_label, tensor_pred, anchors, pos_mask, neg_thresh=0.2):
    """

    :param mx_img: (1, 3, h, w)
    :param mx_label: (1, N, 5), N:objects in the image, 5:(cls_id, xmin, xmax, ymin, ymax), absolute
    :param tensor_pred: mx.nd.array, shape:(1, P, A, C+1+4), P:number of positions, A:anchors on each position
    :param anchors: (P*A, 4), 4:(xmin, xmax, ymin, ymax), relative
    :param neg_thresh: threshold of IoU for labeled as negatives
    :return:
    """
    P, A = tensor_pred.shape[1], tensor_pred.shape[2]
    # get negative indices
    label = mx_label.asnumpy()[0]
    label[:, 1:] = myutils.bbox_abs_to_rel(label[:, 1:], mx_img.shape[-2:])
    ious = gcv.utils.bbox_iou(label[:, 1:], anchors)
    neg_masks = []
    for iou in ious:
        neg_masks.append(iou < neg_thresh)
    neg_mask = np.full(anchors.shape[0], True)
    for mask in neg_masks:
        neg_mask *= mask
    
    neg_indices = np.where(neg_mask)[0]  # shape: (P*A, )
    num_negative = neg_indices.size

    # get positive indices
    pos_indices = np.where(pos_mask.flatten())[0]  # shape: (P*A, )
    num_positive = pos_indices.size

    # separate the positive indices from negative indices
    neg_indices = list(set(neg_indices) - set(pos_indices))

    # sort the background confidence at raising order
    temp_tensor_pred = tensor_pred.reshape((1, -1, 25))
    bg_conf_with_idx = np.array([[temp_tensor_pred[0, i, 0].asscalar(), i] for i in neg_indices])  # (N, 2)
    sorted_bg_conf_idx = np.argsort(bg_conf_with_idx[:, 0])

    # pick hard negatives
    num_hard_negative = 3 * num_positive
    if num_hard_negative > num_negative:
        num_hard_negative = num_negative
    hard_neg_idx = np.array(neg_indices)[sorted_bg_conf_idx[:num_hard_negative]]

    # generate mask
    neg_mask = np.full((P*A, ), False)
    neg_mask[hard_neg_idx] = True

    return neg_mask    # box_mask
Ejemplo n.º 4
0
def visualize_grids(img, label, S=7):
    """
    Plot grids and label bounding boxes on the given image.

    :param img: np.array, uint8, (h, w, c)
    :param label: np.array, int32, (N, 5), N represents the id of bbox, the 5 represents (cls_id, x1, y1, x2, y2)
    :param S: the image is divided by S * S grids
    :return: fig, the figure on which plot.
    """

    label = label.reshape((-1, 5))

    fig = plt.imshow(img)
    axes = fig.axes

    height, width = img.shape[:2]
    x_interval = width / S
    y_interval = height / S

    grid_line_start_point = []
    grid_line_end_point = []
    for i in range(S+1):
        grid_line_start_point.append([x_interval * i, 0])
        grid_line_end_point.append([x_interval * i, height])
        grid_line_start_point.append([0, y_interval * i])
        grid_line_end_point.append([width, y_interval * i])

    for i in range(len(grid_line_start_point)):
        x_coords, y_coords = zip(*(grid_line_start_point[i], grid_line_end_point[i]))
        plt.plot(x_coords, y_coords, 'b-', linewidth=1)

    axes.set_xmargin(0)
    axes.set_ymargin(0)

    for obj_label in label:
        rltv_bbox = myutils.bbox_abs_to_rel(bbox=obj_label[1:], pic_size=img.shape[:2])
        myutils._add_rectangle(axes, rltv_bbox)

        x_center, y_center = get_center_coord_of_bboxes(obj_label[1:])[0]
        plt.plot(x_center, y_center, 'r.', markersize=15)

    return fig
Ejemplo n.º 5
0
def visualize_pred(img, label, tensor_pred):
    """
    Visualize the comparable boxes predicted for each ground-truth box.

    :param img: np.array, (h, w, c)
    :param label: np.array, (N, 5)
    :param tensor_pred: np.array, (S, S, B*5+20)
    :return: the figure that plots on
    """
    img_size = img.shape[:2]
    S = int(tensor_pred.shape[0])
    B = int((tensor_pred.shape[-1] - 20) // 5)
    center_grids = get_center_grid_of_bboxes(img_size, label[:, 1:], S)  # (N, 2), (row, column)

    boxes_pstv = np.array([])
    for center_grid in center_grids:
        grid_row, grid_col = center_grid
        boxes_pred = np.array([])
        for i in range(B):
            temp_boxes_pred = tensor_pred[grid_row, grid_col, i*5:i*5+4].reshape((1, 4))
            if boxes_pred.size == 0:
                boxes_pred = temp_boxes_pred
            else:
                boxes_pred = np.concatenate((boxes_pred, temp_boxes_pred))

        for box_yolo in boxes_pred:
            box_abs = translate_box_yolo_to_abs(img_size, box_yolo, center_grid, S)
            if boxes_pstv.size == 0:
                boxes_pstv = box_abs.reshape((1, 4))
            else:
                boxes_pstv = np.concatenate((boxes_pstv, box_abs.reshape((1, 4))))

    fig = myutils.data_visualize(img, label[:, 1:])
    axes = fig.axes[0]
    for box in boxes_pstv:
        box_rel = myutils.bbox_abs_to_rel(box, img_size)
        myutils._add_rectangle(axes, box_rel, 'blue')

    return fig
Ejemplo n.º 6
0
def _generate_target(mx_img, mx_label, anchors, do_hard_mining=False, tensor_pred=None, neg_thresh=0.2):
    """
    这个函数需要修改,但是不是现在  
    mx_img: mx.nd.array, (b, 3, h, w)  
    mx_label: mx.nd.array, (b, N, 5), relative  
    anchors: np.array, (1, P*A, 4), relative  
    tensor_pred: mx.nd.array, (b, P*A, C+1+4), relative  
    return: cls_targ, box_targ, pos_mask, neg_mask
    """

    height, width = mx_img.shape[-2:]
    # label = mx_labels[0, :, :].asnumpy()
    # height, width = img_size
    gt_h_w = mx.nd.array([mx_label[:, 4] - label[:, 2], label[:, 3] - label[:, 1]])  # the height and width of ground truth boxes
    gt_h_w = gt_h_w.transpose((1, 0))  # (M, 2), 2:(height, width)
    scale = (gt_h_w[:, 0] * gt_h_w[:, 1]) / (height * width)  # (M, )

    pos_mask = np.full((anchors.shape[0], ), False)  # (N, )
    for gt_box in label[:, 1:]:  # gt_box shape: (4, )
        # strategy 1
        rltv_gt_box = myutils.bbox_abs_to_rel(gt_box.reshape((-1, 4)), img_size)  # (1, 4)
        ious = gcv.utils.bbox.bbox_iou(rltv_gt_box.asnumpy(), anchors) # (1, N)
        # ious = mx.nd.contrib.box_iou(mx.nd.array(rltv_gt_box), mx.nd.array(anchor))  
        ious = mx.nd.array(ious)
        max_iou_idx = mx.nd.argmax(ious)
        pos_mask[max_iou_idx] = True

        # strategy 2
        ious = gcv.utils.bbox_iou(rltv_gt_box.asnumpy(), anchors)
        ious = ious[0]  # (N, )
        pos_mask = mx.nd.where(ious > 0.2, mx.nd.full(pos_mask.shape, True), pos_mask)
        # pos_mask[np.where(ious > 0.2)] = True

    mask_not_zero_idx = np.where(pos_mask == True)[0]

    box_target = mx.nd.zeros(anchors.shape)
    cls_target = mx.nd.zeros(anchors.shape[0])
    for anchor_idx in mask_not_zero_idx:
        achr = anchors[anchor_idx]  # (4, )

        rltv_gt_boxes = myutils.bbox_abs_to_rel(label[:, 1:], img_size)
        ious = mx.nd.contrib.box_iou(mx.nd.array(rltv_gt_boxes), mx.nd.array(achr.reshape((-1, 4))))
        ious = ious.asnumpy().flatten()  # (M, )
        max_iou_idx = np.argmax(ious)

        rltv_gt_box = rltv_gt_boxes[max_iou_idx]  # (4, )

        achr_center_x = np.mean(achr[[0, 2]])
        achr_center_y = np.mean(achr[[1, 3]])
        achr_h = achr[3] - achr[1]
        achr_w = achr[2] - achr[0]

        gt_center_x = np.mean(rltv_gt_box[[0, 2]])
        gt_center_y = np.mean(rltv_gt_box[[1, 3]])
        gt_h = rltv_gt_box[3] - rltv_gt_box[1]
        gt_w = rltv_gt_box[2] - rltv_gt_box[0]

        box_target[anchor_idx, 0] = (gt_center_x - achr_center_x) / achr_w / 0.1
        box_target[anchor_idx, 1] = (gt_center_y - achr_center_y) / achr_h / 0.1
        box_target[anchor_idx, 2] = np.log(gt_w / achr_w) / 0.2
        box_target[anchor_idx, 3] = np.log(gt_h / achr_h) / 0.2

        cls_target[anchor_idx] = label[max_iou_idx, 0] + 1  # (N, )
    
    if not do_hard_mining:
        return box_target, pos_mask, cls_target

    # hard negative mining
    neg_mask = _hard_negative_mining(mx_img, mx_label, tensor_pred, anchor, pos_mask, neg_thresh)

    return cls_target, box_target, pos_mask, neg_mask