Exemple #1
0
def lane_detection(ori_image, mean, std, input_size, nnet, point=True):
    image = cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB)
    height, width = image.shape[0:2]
    images = np.zeros((1, 3, input_size[0], input_size[1]), dtype=np.float32)
    masks = np.ones((1, 1, input_size[0], input_size[1]), dtype=np.float32)
    orig_target_sizes = torch.tensor(input_size).unsqueeze(0).cuda()
    pad_image = image.copy()
    pad_mask = np.zeros((height, width, 1), dtype=np.float32)
    resized_image = cv2.resize(pad_image, (input_size[1], input_size[0]))
    resized_mask = cv2.resize(pad_mask, (input_size[1], input_size[0]))
    masks[0][0] = resized_mask.squeeze()
    resized_image = resized_image / 255.
    normalize_(resized_image, mean, std)
    resized_image = resized_image.transpose(2, 0, 1)
    images[0] = resized_image
    images = torch.from_numpy(images).cuda(non_blocking=True)
    masks = torch.from_numpy(masks).cuda(non_blocking=True)
    torch.cuda.synchronize(0)  # 0 is the GPU id
    outputs, _ = nnet.test([images, masks])
    torch.cuda.synchronize(0)  # 0 is the GPU id
    results = PostProcess(outputs, orig_target_sizes)

    pred = results[0].cpu().numpy()
    img = pad_image
    img_h, img_w, _ = img.shape
    pred = pred[pred[:, 0].astype(int) == 1]
    # overlay = np.zeros_like(img, np.uint8)
    overlay_rgb = img.copy()
    point_xy = []
    for i, lane in enumerate(pred):
        lane = lane[1:]  # remove conf
        lower, upper = lane[0], lane[1]
        lane = lane[2:]  # remove upper, lower positions

        # generate points from the polynomial
        ys = np.linspace(lower, upper, num=100)
        points = np.zeros((len(ys), 2), dtype=np.int32)
        points[:, 1] = (ys * img_h).astype(int)
        points[:, 0] = ((lane[0] / (ys - lane[1])**2 + lane[2] /
                         (ys - lane[1]) + lane[3] + lane[4] * ys - lane[5]) *
                        img_w).astype(int)
        points = points[(points[:, 0] > 0) & (points[:, 0] < img_w)]
        point_xy.append(points)
        if point:
            for xxx, yyy in points:
                # cv2.circle(overlay, (xxx, yyy), 1, color=WHITE, thickness=1)
                cv2.circle(overlay_rgb, (xxx, yyy),
                           1,
                           color=GREEN,
                           thickness=1)
        else:
            for current_point, next_point in zip(points[:-1], points[1:]):
                # overlay = cv2.line(overlay, tuple(current_point), tuple(next_point), color=WHITE, thickness=1)
                overlay_rgb = cv2.line(overlay_rgb,
                                       tuple(current_point),
                                       tuple(next_point),
                                       color=GREEN,
                                       thickness=1)
    return overlay_rgb, point_xy
Exemple #2
0
def image_preprocess(db, cfg_file, db_inds, scales, result_dir, debug, no_flip, im_queue):
    num_images = db_inds.size
    
    for ind in range(0, num_images):
        db_ind = db_inds[ind]

        image_id   = db.image_ids(db_ind)
        image_file = db.image_file(db_ind)
        image      = cv2.imread(image_file)

        height, width = image.shape[0:2]

        for scale in scales:
            new_height = int(height * scale)
            new_width  = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])
            
            if 'DLA' in cfg_file:
                inp_height = (new_height | 31)+1
                inp_width  = (new_width | 31)+1
            else:
                inp_height = new_height | 127
                inp_width  = new_width | 127

            images  = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios  = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes   = np.zeros((1, 2), dtype=np.float32)
            
            if 'DLA' in cfg_file:
                out_height, out_width = inp_height // 4, inp_width // 4
            else:
                out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            
            height_ratio = out_height / inp_height
            width_ratio  = out_width  / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0]  = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0]   = [int(height * scale), int(width * scale)]
            ratios[0]  = [height_ratio, width_ratio]       

            if not no_flip:
                images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            
            im_queue.put([images, ratios, borders, sizes, out_width, image_id])
            
    time.sleep(num_images*10)
    def __getitem__(self, idx):
        hr_img = cv2.imread(self.hr_lists[idx]).astype(np.float32)
        lr_img = cv2.imread(self.lr_lists[idx]).astype(np.float32)
        file_name = os.path.basename(self.hr_lists[idx]).split('.')[0]

        if self.need_patch:
            patch_pair = get_patch(hr_img, lr_img, self.patch_size, self.scale)
        else:
            patch_pair = {'hr_patch': hr_img, 'lr_patch': lr_img}

        # augment the dataset
        if self.aug:
            patch_pair = augment(patch_pair)

        # normalization
        # if self.normalization == 0:
        #     pass
        # elif self.normalization == 1:
        #     patch_pair['hr_patch'] = patch_pair['hr_patch'] / 255.0
        #     patch_pair['lr_patch'] = patch_pair['lr_patch'] / 255.0
        # else:
        #     raise NotImplementedError
        patch_pair['hr_patch'] = normalize_(patch_pair['hr_patch'],
                                            type=self.normalization)
        patch_pair['lr_patch'] = normalize_(patch_pair['lr_patch'],
                                            type=self.normalization)

        patch_pair['hr_patch'] = np.transpose(patch_pair['hr_patch'],
                                              (2, 0, 1)).astype(np.float32)
        patch_pair['lr_patch'] = np.transpose(patch_pair['lr_patch'],
                                              (2, 0, 1)).astype(np.float32)

        patch_pair['hr_patch'] = torch.from_numpy(patch_pair['hr_patch'])
        patch_pair['lr_patch'] = torch.from_numpy(patch_pair['lr_patch'])

        return {
            'hr': patch_pair['hr_patch'],
            'lr': patch_pair['lr_patch'],
            'fn': file_name
        }
Exemple #4
0
    def compute_gn_loss(self, f_t, fb, ub, train_or_val):
        '''
        f_t: target features F_a(ua)
        fb: feature map b, BxCxHxW
        ub: pos matches of ua in b
        '''
        # compute start point and its feature
        ub = ub.to(device)
        B, N, _ = ub.shape
        # uniformly sample a perturbation from interval [-1,1]
        xs = torch.FloatTensor(ub.shape).uniform_(-1,1).to(device) + ub
        f_s = extract_features(fb, xs)
        # compute residual
        f_t = normalize_(f_t)
        f_s = normalize_(f_s)

        r = f_s - f_t
        # compute Jacobian
        f_s_gx, f_s_gy = np_gradient_filter(fb)  
        J_xs_x = extract_features(f_s_gx, xs)
        J_xs_y = extract_features(f_s_gy, xs)
        J = torch.stack([J_xs_x, J_xs_y], dim=-1)  

        # compute Heissian
        eps = 1e-9  # for invertibility
        H = (J.transpose(1, 2) @ J + eps * batched_eye_like(J, J.shape[2]))
        b = J.transpose(1, 2) @ r[..., None]
        miu = xs.reshape(B * N, 2, 1) - torch.inverse(H) @ b
        # first error term
        e1 = 0.5 * ((ub.reshape(B * N, 2, 1) - miu).transpose(1, 2)).type(torch.float32) @ H @ \
            (ub.reshape(B * N, 2, 1) - miu).type(torch.float32)
        e1 = torch.sum(e1)
        # second error term
        det_H = torch.clamp(torch.det(H), min=1e-16)
        log_det = torch.log(det_H).to(device)
        e2 = B * N * torch.log(torch.tensor(2 * np.pi)).to(device) - 0.5 * log_det.sum(-1).to(device)
        # e = e1 + 2 * e2 / 7
        e = self.e1_lamda * e1 + self.e2_lamda * e2
        return e, e1, e2
Exemple #5
0
def get_input_image(args):
    img = cv2.imread(args.test_file)
    file_name = os.path.basename(args.test_file).split('.')[0]

    # if args.normalization == 0:
    #     pass
    # elif args.normalization == 1:
    #     img = np.float32(img) / 255.0
    # else:
    #     raise NotImplementedError
    img = normalize_(img, type=args.normalization)

    img = np.transpose(img, (2, 0, 1)).astype(np.float32)
    img = torch.from_numpy(img)
    img = img.unsqueeze(0)

    return {'img': img, 'fn': file_name}
Exemple #6
0
def kp_detection_image(image, db: LV, nnet: NetworkFactory,
                       debug=False, decode_func=kp_decode, db_ind=None,
                       debug_dir=None):
    """对单张图做detection

    :param image: 使用cv2.imread读入的图
    :param db:
    :param nnet:
    :param debug:
    :param decode_func:
    :param db_ind:
    :param debug_dir:
    :return: {[1-5] -> (该类中检测到的数目, 5)}, 分别为tl_xs, tl_ys, br_xs, br_ys, scores
    """
    if debug and (db_ind is None or debug_dir is None):
        raise ValueError(
            "db_ind and debug_dir should be specified when debug is turned on")

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    height, width = image.shape[0:2]

    detections = []
    center_points = []

    for scale in scales:
        new_height = int(height * scale)
        new_width = int(width * scale)
        new_center = np.array([new_height // 2, new_width // 2])

        # 不懂为什么要做这个按位或
        inp_height = new_height | 127
        inp_width = new_width | 127

        images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
        ratios = np.zeros((1, 2), dtype=np.float32)
        borders = np.zeros((1, 4), dtype=np.float32)
        sizes = np.zeros((1, 2), dtype=np.float32)

        # (inp_height + 1)、(inp_width + 1)肯定可以被4除尽
        out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
        height_ratio = out_height / inp_height
        width_ratio = out_width / inp_width

        # 先按照scale来resize
        resized_image = cv2.resize(image, (new_width, new_height))
        # 然后使用scale后的image的中心点,与inp_height、inp_width进行crop
        # 由于inp_height、inp_width一定是比new_height、new_width大的,故这一步
        # 实际上是在按照中心,扩大图片,并在周围补黑边。
        resized_image, border, offset = crop_image(
            resized_image, new_center, [inp_height, inp_width])

        resized_image = resized_image / 255.
        normalize_(resized_image, db.mean, db.std)

        # resized_image是(H, W, C),现在改成(C, H, W)以供pytorch使用
        images[0] = resized_image.transpose((2, 0, 1))
        borders[0] = border
        # 这个size是有内容的图片大小,resized_image的大小为[inp_height, inp_width]
        sizes[0] = [int(height * scale), int(width * scale)]
        # 这个是out比上inp
        ratios[0] = [height_ratio, width_ratio]

        # 这个似乎是把原图和垂直翻折后的图片放在一起
        images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        # dets: (batch, 2 * num_dets, 8)
        # center: (batch, 2 * K, 4)
        dets, center = decode_func(nnet, images, K,
                                   ae_threshold=ae_threshold,
                                   kernel=nms_kernel)
        dets = dets.reshape(2, -1, 8)
        center = center.reshape(2, -1, 4)
        # 这两步是把垂直翻折后图片的检测结果,变换到原图上
        dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
        center[1, :, [0]] = out_width - center[1, :, [0]]
        dets = dets.reshape(1, -1, 8)  # (1, 2 * num_dets, 8)
        center = center.reshape(1, -1, 4)  # (1, 2 * K, 4)

        # 去除在原图中不合法的框
        _rescale_dets(dets, ratios, borders, sizes)

        center[..., [0]] /= ratios[:, 1][:, None, None]
        center[..., [1]] /= ratios[:, 0][:, None, None]
        center[..., [0]] -= borders[:, 2][:, None, None]
        center[..., [1]] -= borders[:, 0][:, None, None]
        np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None],
                out=center[..., [0]])
        np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None],
                out=center[..., [1]])

        # 回复到原图中的坐标
        dets[:, :, 0:4] /= scale
        center[:, :, 0:2] /= scale

        # center point只使用scale为1的时候
        if scale == 1:
            center_points.append(center)
        detections.append(dets)

    # 把所有scale下检测出的统一合并起来
    detections = np.concatenate(detections, axis=1)         # (1, 2 * num_dets * len(scales), 8)
    center_points = np.concatenate(center_points, axis=1)   # (1, 2 * K, 4)

    classes = detections[..., -1]
    classes = classes[0]            # (2 * num_dets * len(scales),)
    detections = detections[0]      # (2 * num_dets * len(scales), 8)
    center_points = center_points[0]    # (2 * K, 4)

    # 获得所有的合法候选框
    valid_ind = detections[:, 4] > -1
    valid_detections = detections[valid_ind]    # (合法候选框, 8)

    box_width = valid_detections[:, 2] - valid_detections[:, 0]     # (合法候选框,)
    box_height = valid_detections[:, 3] - valid_detections[:, 1]    # (合法候选框,)

    # 小候选框与大候选框
    s_ind = (box_width * box_height <= 22500)
    l_ind = (box_width * box_height > 22500)

    s_detections = valid_detections[s_ind]  # (小框, 8)
    l_detections = valid_detections[l_ind]  # (大框, 8)

    # 小框:判断中心区域是否有中心点
    # 只要中心区域有一个同类中心点即可,分数按最高的算
    s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
    s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
    s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
    s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

    s_temp_score = copy.copy(s_detections[:, 4])
    s_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    s_left_x = s_left_x[np.newaxis, :]
    s_right_x = s_right_x[np.newaxis, :]
    s_top_y = s_top_y[np.newaxis, :]
    s_bottom_y = s_bottom_y[np.newaxis, :]

    ind_lx = (center_x - s_left_x) > 0
    ind_rx = (center_x - s_right_x) < 0
    ind_ty = (center_y - s_top_y) > 0
    ind_by = (center_y - s_bottom_y) < 0
    ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0
    ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1
    index_s_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score],
        axis=0)
    s_detections[:, 4][ind_s_new_score] = \
        (s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3

    # 大框:判断中心区域是否有中心点
    l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
    l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
    l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
    l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

    l_temp_score = copy.copy(l_detections[:, 4])
    l_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    l_left_x = l_left_x[np.newaxis, :]
    l_right_x = l_right_x[np.newaxis, :]
    l_top_y = l_top_y[np.newaxis, :]
    l_bottom_y = l_bottom_y[np.newaxis, :]

    ind_lx = (center_x - l_left_x) > 0
    ind_rx = (center_x - l_right_x) < 0
    ind_ty = (center_y - l_top_y) > 0
    ind_by = (center_y - l_bottom_y) < 0
    ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][
                                                    np.newaxis, :]) == 0
    ind_l_new_score = np.max(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
         (ind_by + 0) & (ind_cls + 0)),
        axis=0) == 1
    index_l_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
         (ind_cls + 0))[:, ind_l_new_score],
        axis=0)
    l_detections[:, 4][ind_l_new_score] = \
        (l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3

    # 合并大框小框的检测结果,并按照score排序
    detections = np.concatenate([l_detections, s_detections], axis=0)
    detections = detections[np.argsort(-detections[:, 4])]
    classes = detections[..., -1]

    # reject detections with negative scores
    keep_inds = (detections[:, 4] > -1)
    detections = detections[keep_inds]
    classes = classes[keep_inds]

    ret = {}
    for j in range(categories):
        keep_inds = (classes == j)
        ret[j + 1] = detections[keep_inds][:, 0:7].astype(
            np.float32)
        if merge_bbox:
            soft_nms_merge(ret[j + 1], Nt=nms_threshold,
                           method=nms_algorithm, weight_exp=weight_exp)
        else:
            soft_nms(ret[j + 1], Nt=nms_threshold,
                     method=nms_algorithm)
        ret[j + 1] = ret[j + 1][:, 0:5]

    scores = np.hstack([
        ret[j][:, -1]
        for j in range(1, categories + 1)
    ])
    if len(scores) > max_per_image:
        kth = len(scores) - max_per_image
        thresh = np.partition(scores, kth)[kth]
        for j in range(1, categories + 1):
            keep_inds = (ret[j][:, -1] >= thresh)
            ret[j] = ret[j][keep_inds]

    if debug:
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)
        im = image[:, :, (2, 1, 0)]
        fig, ax = plt.subplots(figsize=(12, 12))
        fig = ax.imshow(im, aspect='equal')
        plt.axis('off')
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)
        # bboxes = {}
        for j in range(1, categories + 1):
            keep_inds = (ret[j][:, -1] >= 0.4)      # 这边调整画图时接收的阈值
            cat_name = db.class_name(j)
            for bbox in ret[j][keep_inds]:
                score = bbox[4]
                bbox = bbox[0:4].astype(np.int32)
                xmin = bbox[0]
                ymin = bbox[1]
                xmax = bbox[2]
                ymax = bbox[3]
                # if (xmax - xmin) * (ymax - ymin) > 5184:
                ax.add_patch(
                    plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                  fill=False, edgecolor=colours[j - 1],
                                  linewidth=4.0))
                ax.text(xmin + 1, ymin - 3, '{} {:.3f}'.format(cat_name, score),
                        bbox=dict(facecolor=colours[j - 1], ec='black',
                                  lw=2, alpha=0.5),
                        fontsize=15, color='white', weight='bold')

        # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind))
        debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind))
        # plt.savefig(debug_file1)
        plt.savefig(debug_file2, bbox_inches='tight', pad_inches=0)
        plt.close()
        # cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])

        # 同时保存gt图以供对比
        db.display(db_ind, os.path.join(debug_dir, "{}_gt.jpg".format(db_ind)),
                   show=False)

    return ret
Exemple #7
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    result_json = os.path.join(result_dir, "results.json")
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)
    if db.split != "trainval":
        db_inds = db.db_inds[:100] if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]
    num_images = db_inds.size

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]
    if True:

        top_bboxes = {}
        for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
            db_ind = db_inds[ind]
            image_id = db.image_ids(db_ind)
            image_file = db.image_file(db_ind)
            image = cv2.imread(image_file)
            image_xy = np.zeros((image.shape[0], image.shape[1], 2),
                                dtype=np.float32)
            x_mark = np.arange(image.shape[1],
                               dtype=np.float32) / image.shape[1]
            for i in range(image.shape[0]):
                image_xy[i, :, 0] = x_mark
            y_mark = np.arange(image.shape[0],
                               dtype=np.float32) / image.shape[0]
            for i in range(image.shape[1]):
                image_xy[:, i, 1] = y_mark
            height, width = image.shape[0:2]

            detections = []

            for scale in scales:
                new_height = int(height * scale)
                new_width = int(width * scale)
                new_center = np.array([new_height // 2, new_width // 2])

                inp_height = new_height | 127
                inp_width = new_width | 127
                images = np.zeros((1, 5, inp_height, inp_width),
                                  dtype=np.float32)
                ratios = np.zeros((1, 2), dtype=np.float32)
                borders = np.zeros((1, 4), dtype=np.float32)
                sizes = np.zeros((1, 2), dtype=np.float32)

                out_height, out_width = (inp_height + 1) // 4, (inp_width +
                                                                1) // 4
                height_ratio = out_height / inp_height
                width_ratio = out_width / inp_width
                resized_image = cv2.resize(image, (new_width, new_height))
                resized_image_xy = cv2.resize(image_xy,
                                              (new_width, new_height))
                resized_image, border, offset = crop_image(
                    resized_image, new_center, [inp_height, inp_width])
                resized_image_xy, border, offset = crop_image(
                    resized_image_xy, new_center, [inp_height, inp_width])
                resized_image = resized_image / 255.
                normalize_(resized_image, db.mean, db.std)

                images[0, 0:3] = resized_image.transpose((2, 0, 1))
                images[0, 3:5] = resized_image_xy.transpose((2, 0, 1))
                borders[0] = border
                sizes[0] = [int(height * scale), int(width * scale)]
                ratios[0] = [height_ratio, width_ratio]

                images = torch.from_numpy(images)
                dets, dets_tl, dets_br, flag = decode_func(
                    nnet,
                    images,
                    K,
                    ae_threshold=ae_threshold,
                    kernel=nms_kernel)
                if not flag:
                    print("error when try to test %s" % image_file)
                    continue
                dets = dets.reshape(1, -1, 8)

                _rescale_dets(dets, ratios, borders, sizes)
                dets[:, :, 0:4] /= scale
                detections.append(dets)
            if len(detections) == 0:
                continue
            detections = np.concatenate(detections, axis=1)

            classes = detections[..., -1]
            classes = classes[0]
            detections = detections[0]

            # reject detections with negative scores
            keep_inds = (detections[:, 4] > -1)
            detections = detections[keep_inds]
            classes = classes[keep_inds]

            top_bboxes[image_id] = {}
            for j in range(categories):
                keep_inds = (classes == j)
                top_bboxes[image_id][j +
                                     1] = detections[keep_inds][:, 0:7].astype(
                                         np.float32)
                if merge_bbox:
                    nms.soft_nms_merge(top_bboxes[image_id][j + 1],
                                       Nt=nms_threshold,
                                       method=nms_algorithm,
                                       weight_exp=weight_exp)
                else:
                    nms.soft_nms(top_bboxes[image_id][j + 1],
                                 Nt=nms_threshold,
                                 method=nms_algorithm)
                top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:,
                                                                          0:5]

            scores = np.hstack([
                top_bboxes[image_id][j][:, -1]
                for j in range(1, categories + 1)
            ])
            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, categories + 1):
                    keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                    top_bboxes[image_id][j] = top_bboxes[image_id][j][
                        keep_inds]

            if debug:
                image_file = db.image_file(db_ind)
                image = cv2.imread(image_file)

                bboxes = {}
                for j in range(1, categories + 1):
                    keep_inds = (top_bboxes[image_id][j][:, -1] > 0.5)
                    cat_name = db.class_name(j)
                    cat_size = cv2.getTextSize(cat_name,
                                               cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                               2)[0]
                    color = np.random.random((3, )) * 0.6 + 0.4
                    color = color * 255
                    color = color.astype(np.int32).tolist()
                    for bbox in top_bboxes[image_id][j][keep_inds]:
                        bbox = bbox[0:4].astype(np.int32)
                        if bbox[1] - cat_size[1] - 2 < 0:
                            cv2.rectangle(image, (bbox[0], bbox[1] + 2),
                                          (bbox[0] + cat_size[0],
                                           bbox[1] + cat_size[1] + 2), color,
                                          -1)
                            cv2.putText(image,
                                        cat_name,
                                        (bbox[0], bbox[1] + cat_size[1] + 2),
                                        cv2.FONT_HERSHEY_SIMPLEX,
                                        0.5, (0, 0, 0),
                                        thickness=1)
                        else:
                            cv2.rectangle(image,
                                          (bbox[0], bbox[1] - cat_size[1] - 2),
                                          (bbox[0] + cat_size[0], bbox[1] - 2),
                                          color, -1)
                            cv2.putText(image,
                                        cat_name, (bbox[0], bbox[1] - 2),
                                        cv2.FONT_HERSHEY_SIMPLEX,
                                        0.5, (0, 0, 0),
                                        thickness=1)
                        cv2.rectangle(image, (bbox[0], bbox[1]),
                                      (bbox[2], bbox[3]), color, 2)
                debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind))

        detections = db.convert_to_coco(top_bboxes)
        with open(result_json, "w") as f:
            json.dump(detections, f)

    image_ids = [db.image_ids(ind) for ind in db_inds]
    with open(result_json, "r") as f:
        result_json = json.load(f)
    for cls_type in range(1, categories + 1):
        db.evaluate(result_json, [cls_type], image_ids)
    return 0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)

    if db.split != "trainval":
        db_inds = db.db_inds[:100] if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]
    num_images = db_inds.size

    K = db.configs["top_k"]
    aggr_weight = db.configs["aggr_weight"]
    scores_thresh = db.configs["scores_thresh"]
    center_thresh = db.configs["center_thresh"]
    suppres_ghost = db.configs["suppres_ghost"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]

    cluster_radius = db.configs["cluster_radius"]

    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    top_bboxes = {}

    for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        db_ind = db_inds[ind]

        image_id = db.image_ids(db_ind)
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)

        height, width = image.shape[0:2]

        detections = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets = decode_func(nnet,
                               images,
                               K,
                               aggr_weight=aggr_weight,
                               scores_thresh=scores_thresh,
                               center_thresh=center_thresh,
                               kernel=nms_kernel,
                               debug=debug)
            dets = dets.reshape(2, -1, 14)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            dets[1, :, [5, 7, 9, 11]] = out_width - dets[1, :, [5, 7, 9, 11]]
            dets[1, :, [7, 8, 11, 12]] = dets[1, :, [11, 12, 7, 8]].copy()
            dets = dets.reshape(1, -1, 14)

            _rescale_dets(dets, ratios, borders, sizes)
            _rescale_ex_pts(dets, ratios, borders, sizes)
            dets[:, :, 0:4] /= scale
            dets[:, :, 5:13] /= scale
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]

        keep_inds = (detections[:, 4] > 0)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):

            keep_inds = (classes == j)

            top_bboxes[image_id][j + 1] = detections[keep_inds].astype(
                np.float32)

            soft_nms(top_bboxes[image_id][j + 1],
                     Nt=nms_threshold,
                     method=nms_algorithm)

        scores = np.hstack(
            [top_bboxes[image_id][j][:, 4] for j in range(1, categories + 1)])

        if len(scores) > max_per_image:

            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, 4] >= thresh)

                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        for j in range(1, categories + 1):
            keep = []
            i = 0
            for bbox in top_bboxes[image_id][j]:

                sc = bbox[4]
                ex = bbox[5:13].astype(np.int32).reshape(4, 2)
                feature_val = feature(ex)
                if feature_val > cluster_radius:
                    keep.append(i)
                i = i + 1
                top_bboxes[image_id][j] = np.delete(top_bboxes[image_id][j],
                                                    keep,
                                                    axis=0)

        if suppres_ghost:

            for j in range(1, categories + 1):
                n = len(top_bboxes[image_id][j])
                for k in range(n):
                    inside_score = 0
                    if top_bboxes[image_id][j][k, 4] > 0.2:
                        for t in range(n):
                            if _box_inside(top_bboxes[image_id][j][t],
                                           top_bboxes[image_id][j][k]):
                                inside_score += top_bboxes[image_id][j][t, 4]
                        if inside_score > top_bboxes[image_id][j][k, 4] * 3:
                            top_bboxes[image_id][j][k, 4] /= 2

        if debug:

            image_file = db.image_file(db_ind)
            image = cv2.imread(image_file)

            bboxes = {}
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, 4] > 0.3)
                cat_name = db.class_name(j)
                cat_size = cv2.getTextSize(cat_name + '0',
                                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
                color = np.random.random((3, )) * 0.6 + 0.4
                color = color * 255
                color = color.astype(np.int32).tolist()
                for bbox in top_bboxes[image_id][j][keep_inds]:

                    sc = bbox[4]
                    bbox = bbox[0:4].astype(np.int32)
                    txt = '{}{:.0f}'.format(cat_name, sc * 10)
                    if bbox[1] - cat_size[1] - 2 < 0:
                        cv2.rectangle(
                            image, (bbox[0], bbox[1] + 2),
                            (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
                            color, -1)
                        cv2.putText(image,
                                    txt, (bbox[0], bbox[1] + cat_size[1] + 2),
                                    cv2.FONT_HERSHEY_SIMPLEX,
                                    0.5, (0, 0, 0),
                                    thickness=1,
                                    lineType=cv2.LINE_AA)
                    else:
                        cv2.rectangle(image,
                                      (bbox[0], bbox[1] - cat_size[1] - 2),
                                      (bbox[0] + cat_size[0], bbox[1] - 2),
                                      color, -1)
                        cv2.putText(image,
                                    txt, (bbox[0], bbox[1] - 2),
                                    cv2.FONT_HERSHEY_SIMPLEX,
                                    0.5, (0, 0, 0),
                                    thickness=1,
                                    lineType=cv2.LINE_AA)
                    cv2.rectangle(image, (bbox[0], bbox[1]),
                                  (bbox[2], bbox[3]), color, 2)
            debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind))
            cv2.imwrite(debug_file, image)
            cv2.imshow('out', image)
            cv2.waitKey()

    result_json = os.path.join(result_dir, "results.json")
    detections = db.convert_to_coco(top_bboxes)
    with open(result_json, "w") as f:
        json.dump(detections, f)

    cls_ids = list(range(1, categories + 1))
    image_ids = [db.image_ids(ind) for ind in db_inds]
    db.evaluate(result_json, cls_ids, image_ids)
    return 0
Exemple #9
0
def kp_detection(db, nnet, image_root, debug=False, evaluator=None):
    input_size = db.configs["input_size"]  # [h w]
    image_dir = os.path.join(image_root, "images")
    result_dir = os.path.join(image_root, "detections")
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    image_names = os.listdir(image_dir)
    num_images = len(image_names)

    postprocessors = {'bbox': PostProcess()}

    for ind in tqdm(range(0, num_images), ncols=67, desc="locating kps"):
        image_file = os.path.join(image_dir, image_names[ind])
        image = cv2.imread(image_file)
        height, width = image.shape[0:2]

        images = np.zeros((1, 3, input_size[0], input_size[1]),
                          dtype=np.float32)
        masks = np.ones((1, 1, input_size[0], input_size[1]), dtype=np.float32)
        orig_target_sizes = torch.tensor(input_size).unsqueeze(0).cuda()
        pad_image = image.copy()
        pad_mask = np.zeros((height, width, 1), dtype=np.float32)
        resized_image = cv2.resize(pad_image, (input_size[1], input_size[0]))
        resized_mask = cv2.resize(pad_mask, (input_size[1], input_size[0]))
        masks[0][0] = resized_mask.squeeze()
        resized_image = resized_image / 255.
        normalize_(resized_image, db.mean, db.std)
        resized_image = resized_image.transpose(2, 0, 1)
        images[0] = resized_image
        images = torch.from_numpy(images).cuda(non_blocking=True)
        masks = torch.from_numpy(masks).cuda(non_blocking=True)
        torch.cuda.synchronize(0)  # 0 is the GPU id
        t0 = time.time()
        outputs, weights = nnet.test([images, masks])
        torch.cuda.synchronize(0)  # 0 is the GPU id
        t = time.time() - t0
        results = postprocessors['bbox'](outputs, orig_target_sizes)
        if evaluator is not None:
            evaluator.add_prediction(ind, results.cpu().numpy(), t)

        if debug:
            pred = results[0].cpu().numpy()
            img = pad_image
            img_h, img_w, _ = img.shape
            pred = pred[pred[:, 0].astype(int) == 1]
            overlay = img.copy()
            color = (0, 255, 0)
            for i, lane in enumerate(pred):
                lane = lane[1:]  # remove conf
                lower, upper = lane[0], lane[1]
                lane = lane[2:]  # remove upper, lower positions

                # generate points from the polynomial
                ys = np.linspace(lower, upper, num=100)
                points = np.zeros((len(ys), 2), dtype=np.int32)
                points[:, 1] = (ys * img_h).astype(int)
                points[:, 0] = (
                    (lane[0] / (ys - lane[1])**2 + lane[2] /
                     (ys - lane[1]) + lane[3] + lane[4] * ys - lane[5]) *
                    img_w).astype(int)
                points = points[(points[:, 0] > 0) & (points[:, 0] < img_w)]

                # draw lane with a polyline on the overlay
                for current_point, next_point in zip(points[:-1], points[1:]):
                    overlay = cv2.line(overlay,
                                       tuple(current_point),
                                       tuple(next_point),
                                       color=color,
                                       thickness=15)

                # draw lane ID
                if len(points) > 0:
                    cv2.putText(img,
                                str(i),
                                tuple(points[0]),
                                fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                                fontScale=1,
                                color=color,
                                thickness=3)
            # Add lanes overlay
            w = 0.6
            img = ((1. - w) * img + w * overlay).astype(np.uint8)

            cv2.imwrite(
                os.path.join(result_dir, image_names[ind][:-4] + '.jpg'), img)

    return 0
Exemple #10
0
def inference(db, nnet, image, decode_func=kp_decode):
    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    height, width = image.shape[0:2]
    detections, center_points = [], []

    for scale in scales:
        new_height = int(height * scale)
        new_width = int(width * scale)
        new_center = np.array([new_height // 2, new_width // 2])

        inp_height = new_height | 127
        inp_width = new_width | 127

        images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
        ratios = np.zeros((1, 2), dtype=np.float32)
        borders = np.zeros((1, 4), dtype=np.float32)
        sizes = np.zeros((1, 2), dtype=np.float32)

        out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
        height_ratio = out_height / inp_height
        width_ratio = out_width / inp_width

        resized_image = cv2.resize(image, (new_width, new_height))
        resized_image, border, offset = crop_image(resized_image, new_center,
                                                   [inp_height, inp_width])

        resized_image = resized_image / 255.
        normalize_(resized_image, db.mean, db.std)

        images[0] = resized_image.transpose((2, 0, 1))
        borders[0] = border
        sizes[0] = [int(height * scale), int(width * scale)]
        ratios[0] = [height_ratio, width_ratio]

        images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        dets, center = decode_func(nnet,
                                   images,
                                   K,
                                   ae_threshold=ae_threshold,
                                   kernel=nms_kernel)
        dets = dets.reshape(2, -1,
                            8)  # bboxes, scores, tl_scores, br_scores, clses
        center = center.reshape(2, -1, 4)  # ct_xs, ct_ys, ct_clses, ct_scores
        dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]  # flip
        center[1, :, [0]] = out_width - center[1, :, [0]]  # horizontal flip
        dets = dets.reshape(1, -1, 8)
        center = center.reshape(1, -1, 4)

        _rescale_dets(dets, ratios, borders, sizes)
        center[..., [0]] /= ratios[:, 1][:, None,
                                         None]  # remap to origin image
        center[..., [1]] /= ratios[:, 0][:, None, None]
        center[..., [0]] -= borders[:, 2][:, None, None]
        center[..., [1]] -= borders[:, 0][:, None, None]
        np.clip(center[..., [0]],
                0,
                sizes[:, 1][:, None, None],
                out=center[..., [0]])
        np.clip(center[..., [1]],
                0,
                sizes[:, 0][:, None, None],
                out=center[..., [1]])
        dets[:, :, 0:4] /= scale
        center[:, :, 0:2] /= scale  # remap to origin image

        if scale == 1:
            center_points.append(center)
        detections.append(dets)

    detections = np.concatenate(detections, axis=1)
    center_points = np.concatenate(center_points, axis=1)

    classes = detections[..., -1]
    classes = classes[0]
    detections = detections[0]
    center_points = center_points[0]

    valid_ind = detections[:, 4] > -1
    valid_detections = detections[valid_ind]

    box_width = valid_detections[:, 2] - valid_detections[:, 0]
    box_height = valid_detections[:, 3] - valid_detections[:, 1]

    s_ind = (box_width * box_height <= 22500)
    l_ind = (box_width * box_height > 22500)

    s_detections = valid_detections[s_ind]
    l_detections = valid_detections[l_ind]
    # trisection
    s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3  # x + (y-x)/3
    s_right_x = (s_detections[:, 0] +
                 2 * s_detections[:, 2]) / 3  # x +2(y-x)/3
    s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
    s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

    s_temp_score = copy.copy(s_detections[:, 4])
    s_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    s_left_x = s_left_x[np.newaxis, :]
    s_right_x = s_right_x[np.newaxis, :]
    s_top_y = s_top_y[np.newaxis, :]
    s_bottom_y = s_bottom_y[np.newaxis, :]
    # located in center region
    ind_lx = (center_x - s_left_x) > 0
    ind_rx = (center_x - s_right_x) < 0
    ind_ty = (center_y - s_top_y) > 0
    ind_by = (center_y - s_bottom_y) < 0
    # same classes
    ind_cls = (center_points[:, 2][:, np.newaxis] -
               s_detections[:, -1][np.newaxis, :]) == 0
    ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                              (ind_by + 0) & (ind_cls + 0)),
                             axis=0) == 1
    index_s_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
         (ind_cls + 0))[:, ind_s_new_score],
        axis=0)  # select the box having center located in the center region
    s_detections[:, 4][ind_s_new_score] = (
        s_temp_score[ind_s_new_score] * 2 +
        center_points[index_s_new_score, 3]) / 3

    l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
    l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
    l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
    l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

    l_temp_score = copy.copy(l_detections[:, 4])
    l_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    l_left_x = l_left_x[np.newaxis, :]
    l_right_x = l_right_x[np.newaxis, :]
    l_top_y = l_top_y[np.newaxis, :]
    l_bottom_y = l_bottom_y[np.newaxis, :]

    ind_lx = (center_x - l_left_x) > 0
    ind_rx = (center_x - l_right_x) < 0
    ind_ty = (center_y - l_top_y) > 0
    ind_by = (center_y - l_bottom_y) < 0
    ind_cls = (center_points[:, 2][:, np.newaxis] -
               l_detections[:, -1][np.newaxis, :]) == 0
    ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                              (ind_by + 0) & (ind_cls + 0)),
                             axis=0) == 1
    index_l_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
         (ind_cls + 0))[:, ind_l_new_score],
        axis=0)
    l_detections[:, 4][ind_l_new_score] = (
        l_temp_score[ind_l_new_score] * 2 +
        center_points[index_l_new_score, 3]) / 3

    detections = np.concatenate([l_detections, s_detections], axis=0)
    detections = detections[np.argsort(
        -detections[:, 4])]  # resort according to new scores
    classes = detections[..., -1]

    # reject detections with negative scores
    keep_inds = (detections[:, 4] > -1)
    detections = detections[keep_inds]
    classes = classes[keep_inds]

    # soft_nms
    top_bboxes = {}
    for j in range(categories):
        keep_inds = (classes == j)
        top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
        if merge_bbox:
            soft_nms_merge(top_bboxes[j + 1],
                           Nt=nms_threshold,
                           method=nms_algorithm,
                           weight_exp=weight_exp)
        else:
            soft_nms(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm)
        top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5]

    scores = np.hstack(
        [top_bboxes[j][:, -1] for j in range(1, categories + 1)])
    # select boxes
    if len(scores) > max_per_image:
        kth = len(scores) - max_per_image
        thresh = np.partition(scores, kth)[kth]
        for j in range(1, categories + 1):
            keep_inds = (top_bboxes[j][:, -1] >= thresh)
            top_bboxes[j] = top_bboxes[j][keep_inds]

    return top_bboxes
Exemple #11
0
def kp_detection(db, k_ind):
    data_rng     = system_configs.data_rng
    batch_size   = system_configs.batch_size
    input_size   = db.configs["input_size"]
    lighting     = db.configs["lighting"]
    rand_color   = db.configs["rand_color"]
    images   = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) # b, 3, H, W
    masks    = np.zeros((batch_size, 1, input_size[0], input_size[1]), dtype=np.float32)  # b, 1, H, W
    gt_lanes = []

    db_size = db.db_inds.size # 3268 | 2782

    for b_ind in range(batch_size):

        if k_ind == 0:
            db.shuffle_inds()

        db_ind = db.db_inds[k_ind]
        k_ind  = (k_ind + 1) % db_size

        # reading ground truth
        item  = db.detections(db_ind) # all in the raw coordinate
        img   = cv2.imread(item['path'])
        mask  = np.ones((1, img.shape[0], img.shape[1], 1), dtype=np.bool)
        label = item['label']
        transform = True
        if transform:
            line_strings = db.lane_to_linestrings(item['old_anno']['lanes'])
            line_strings = LineStringsOnImage(line_strings, shape=img.shape)
            img, line_strings, mask = db.transform(image=img, line_strings=line_strings, segmentation_maps=mask)
            line_strings.clip_out_of_image_()
            new_anno = {'path': item['path'], 'lanes': db.linestrings_to_lanes(line_strings)}
            new_anno['categories'] = item['categories']
            label = db._transform_annotation(new_anno, img_wh=(input_size[1], input_size[0]))['label']

        # clip polys
        tgt_ids   = label[:, 0]
        label = label[tgt_ids > 0]

        # make lower the same
        label[:, 1][label[:, 1] < 0] = 1
        label[:, 1][...] = np.min(label[:, 1])

        label = np.stack([label] * batch_size, axis=0)
        gt_lanes.append(torch.from_numpy(label.astype(np.float32)))

        img = (img / 255.).astype(np.float32)
        if rand_color:
            color_jittering_(data_rng, img)
            if lighting:
                lighting_(data_rng, img, 0.1, db.eig_val, db.eig_vec)
        normalize_(img, db.mean, db.std)
        images[b_ind]   = img.transpose((2, 0, 1))
        masks[b_ind]    = np.logical_not(mask[:, :, :, 0])

    images   = torch.from_numpy(images)
    masks    = torch.from_numpy(masks)

    return {
               "xs": [images, masks],
               "ys": [images, *gt_lanes]
           }, k_ind
Exemple #12
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    debug_dir = os.path.join(result_dir, "debug")
    NT = 20  # NT:测试图片的数量
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)             # 创建目录
    if db.split != "trainval":
        db_inds = db.db_inds[:NT] if debug else db.db_inds         # 如果不是debug模式,则将数据集中的每张图片进行检测
    else:
        db_inds = db.db_inds[:NT] if debug else db.db_inds[:5000]  # debug模式,则只选NT张图片
    num_images = db_inds.size   # 检测图片的个数
    K = db.configs["top_k"]     # 每张图片保留的检测结果
    ae_threshold = db.configs["ae_threshold"]  # IoU大小
    nms_kernel = db.configs["nms_kernel"]
    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]
    top_bboxes = {}   # 用来记录top-k的检测框
    for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        # 获取图片
        db_ind = db_inds[ind]
        image_id = db.image_ids(db_ind)
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)
        height, width = image.shape[0:2]
        # 记录检测结果以及中心点
        detections = []
        center_points = []
        for scale in scales:
            # 当前尺度下图片的一系列处理
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])
            inp_height = new_height | 127  # 防止超边框
            inp_width = new_width | 127    # 防止超边框
            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)
            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width
            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])
            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)
            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]
            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            # 检测函数
            dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel)
            dets = dets.reshape(2, -1, 8)
            center = center.reshape(2, -1, 4)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            center[1, :, [0]] = out_width - center[1, :, [0]]
            dets = dets.reshape(1, -1, 8)
            center = center.reshape(1, -1, 4)
            _rescale_dets(dets, ratios, borders, sizes)
            center[..., [0]] /= ratios[:, 1][:, None, None]
            center[..., [1]] /= ratios[:, 0][:, None, None]
            center[..., [0]] -= borders[:, 2][:, None, None]
            center[..., [1]] -= borders[:, 0][:, None, None]
            np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]])
            np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]])
            dets[:, :, 0:4] /= scale
            center[:, :, 0:2] /= scale
            if scale == 1:
                center_points.append(center)   # 只记录选图大小的中心点
            detections.append(dets)            # 检测结果
        # 对当前图片的检测结果进行整理
        detections = np.concatenate(detections, axis=1)
        center_points = np.concatenate(center_points, axis=1)
        classes = detections[..., -1]          # 检测类别信息
        classes = classes[0]                   # 类别
        detections = detections[0]
        center_points = center_points[0]
        valid_ind = detections[:, 4] > -1
        valid_detections = detections[valid_ind]
        box_width = valid_detections[:, 2] - valid_detections[:, 0]
        box_height = valid_detections[:, 3] - valid_detections[:, 1]
        s_ind = (box_width * box_height <= 22500)
        l_ind = (box_width * box_height > 22500)
        s_detections = valid_detections[s_ind]
        l_detections = valid_detections[l_ind]
        s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
        s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
        s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
        s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3
        s_temp_score = copy.copy(s_detections[:, 4])     # 每个bbx对应的Score
        s_detections[:, 4] = -1
        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        s_left_x = s_left_x[np.newaxis, :]
        s_right_x = s_right_x[np.newaxis, :]
        s_top_y = s_top_y[np.newaxis, :]
        s_bottom_y = s_bottom_y[np.newaxis, :]

        ind_lx = (center_x - s_left_x) > 0
        ind_rx = (center_x - s_right_x) < 0
        ind_ty = (center_y - s_top_y) > 0
        ind_by = (center_y - s_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0
        ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_s_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0)
        s_detections[:, 4][ind_s_new_score] = (s_temp_score[ind_s_new_score] * 2 + center_points[
            index_s_new_score, 3]) / 3

        l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
        l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
        l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
        l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

        l_temp_score = copy.copy(l_detections[:, 4])
        l_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        l_left_x = l_left_x[np.newaxis, :]
        l_right_x = l_right_x[np.newaxis, :]
        l_top_y = l_top_y[np.newaxis, :]
        l_bottom_y = l_bottom_y[np.newaxis, :]

        ind_lx = (center_x - l_left_x) > 0
        ind_rx = (center_x - l_right_x) < 0
        ind_ty = (center_y - l_top_y) > 0
        ind_by = (center_y - l_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0
        ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_l_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0)
        l_detections[:, 4][ind_l_new_score] = (l_temp_score[ind_l_new_score] * 2 + center_points[
            index_l_new_score, 3]) / 3

        detections = np.concatenate([l_detections, s_detections], axis=0)
        detections = detections[np.argsort(-detections[:, 4])]
        classes = detections[..., -1]
        keep_inds = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes = classes[keep_inds]
        # NMS处理
        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm,
                               weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack([
            top_bboxes[image_id][j][:, -1]
            for j in range(1, categories + 1)
        ])
        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]
        # debug模式
        if debug:
            image_file = db.image_file(db_ind)
            _, filename0 = os.path.split(image_file)      # 分离出文件名
            img_name0, _ = os.path.splitext(filename0)    # 去掉后缀的文件
            FileTXT = open(debug_dir + "/" + img_name0 + ".txt", mode="a")   # 文件流,用来记录检测框位置
            image = cv2.imread(image_file)
            im = image[:, :, (2, 1, 0)]
            fig, ax = plt.subplots(figsize=(12, 12))
            fig = ax.imshow(im, aspect='equal')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)
            # bboxes = {}
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4)
                cat_name = db.class_name(j)
                for bbox in top_bboxes[image_id][j][keep_inds]:
                    bbox = bbox[0:4].astype(np.int32)
                    xmin = bbox[0]
                    ymin = bbox[1]
                    xmax = bbox[2]
                    ymax = bbox[3]
                    FileTXT.write(str(1) + ' ' + str(int(xmin)) + ' ' + str(int(ymin))
                                  + ' ' + str(int(xmax)) + ' ' + str(int(ymax)) + ' ' + str(1))
                    FileTXT.write('\n')   # bbx位置大小信息
                    # 画框
                    ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1],
                                      linewidth=4.0))
                    ax.text(xmin + 1, ymin - 3, '{:s}'.format(cat_name),
                            bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5),
                            fontsize=15, color='white', weight='bold')

            # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind))   # 用来生成pdf图片
            debug_file2 = os.path.join(debug_dir, "{}.jpg".format(img_name0))  # jpg格式
            # plt.savefig(debug_file1)
            plt.savefig(debug_file2)  # 保存图片
            plt.close()
    result_json = os.path.join(result_dir, "results.json")   # 用json脚本存储检测结果
    detections = db.convert_to_coco(top_bboxes)
    with open(result_json, "w") as f:
        json.dump(detections, f)
    cls_ids = list(range(1, categories + 1))
    image_ids = [db.image_ids(ind) for ind in db_inds]
    db.evaluate(result_json, cls_ids, image_ids)   # 验证
    return 0
Exemple #13
0
def kp_detection(db, nnet, result_dir, debug=True, decode_func=kp_decode):

    db_inds = db.db_inds[:10] if debug else db.db_inds
    num_images = db_inds.size

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    top_bboxes = {}

    for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        db_ind = db_inds[ind]
        image_id = db.image_ids(db_ind)
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)

        # Paths
        result_path = result_dir + "/{}".format(image_id[:-4])
        result_json = os.path.join(result_path, "results.json")
        result_debug = os.path.join(result_path, "{}.jpg".format(db_ind))

        if pexists(result_json):
            continue

        # Create dirs
        Path(result_path).mkdir(parents=True, exist_ok=True)

        height, width = image.shape[0:2]

        detections = []
        center_points = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets, center = decode_func(nnet,
                                       images,
                                       K,
                                       ae_threshold=ae_threshold,
                                       kernel=nms_kernel)
            dets = dets.reshape(2, -1, 8)
            center = center.reshape(2, -1, 4)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            center[1, :, [0]] = out_width - center[1, :, [0]]
            dets = dets.reshape(1, -1, 8)
            center = center.reshape(1, -1, 4)

            _rescale_dets(dets, ratios, borders, sizes)
            center[..., [0]] /= ratios[:, 1][:, None, None]
            center[..., [1]] /= ratios[:, 0][:, None, None]
            center[..., [0]] -= borders[:, 2][:, None, None]
            center[..., [1]] -= borders[:, 0][:, None, None]
            np.clip(center[..., [0]],
                    0,
                    sizes[:, 1][:, None, None],
                    out=center[..., [0]])
            np.clip(center[..., [1]],
                    0,
                    sizes[:, 0][:, None, None],
                    out=center[..., [1]])
            dets[:, :, 0:4] /= scale
            center[:, :, 0:2] /= scale

            if scale == 1:
                center_points.append(center)
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)
        center_points = np.concatenate(center_points, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]
        center_points = center_points[0]

        valid_ind = detections[:, 4] > -1
        valid_detections = detections[valid_ind]

        box_width = valid_detections[:, 2] - valid_detections[:, 0]
        box_height = valid_detections[:, 3] - valid_detections[:, 1]

        s_ind = (box_width * box_height <= 22500)
        l_ind = (box_width * box_height > 22500)

        s_detections = valid_detections[s_ind]
        l_detections = valid_detections[l_ind]

        s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
        s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
        s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
        s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

        s_temp_score = copy.copy(s_detections[:, 4])
        s_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        s_left_x = s_left_x[np.newaxis, :]
        s_right_x = s_right_x[np.newaxis, :]
        s_top_y = s_top_y[np.newaxis, :]
        s_bottom_y = s_bottom_y[np.newaxis, :]

        ind_lx = (center_x - s_left_x) > 0
        ind_rx = (center_x - s_right_x) < 0
        ind_ty = (center_y - s_top_y) > 0
        ind_by = (center_y - s_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   s_detections[:, -1][np.newaxis, :]) == 0
        ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_s_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_s_new_score],
            axis=0)
        s_detections[:, 4][ind_s_new_score] = (
            s_temp_score[ind_s_new_score] * 2 +
            center_points[index_s_new_score, 3]) / 3

        l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
        l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
        l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
        l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

        l_temp_score = copy.copy(l_detections[:, 4])
        l_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        l_left_x = l_left_x[np.newaxis, :]
        l_right_x = l_right_x[np.newaxis, :]
        l_top_y = l_top_y[np.newaxis, :]
        l_bottom_y = l_bottom_y[np.newaxis, :]

        ind_lx = (center_x - l_left_x) > 0
        ind_rx = (center_x - l_right_x) < 0
        ind_ty = (center_y - l_top_y) > 0
        ind_by = (center_y - l_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   l_detections[:, -1][np.newaxis, :]) == 0
        ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_l_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_l_new_score],
            axis=0)
        l_detections[:, 4][ind_l_new_score] = (
            l_temp_score[ind_l_new_score] * 2 +
            center_points[index_l_new_score, 3]) / 3

        detections = np.concatenate([l_detections, s_detections], axis=0)
        detections = detections[np.argsort(-detections[:, 4])]
        classes = detections[..., -1]

        keep_inds = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(
                np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1],
                               Nt=nms_threshold,
                               method=nms_algorithm,
                               weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1],
                         Nt=nms_threshold,
                         method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack(
            [top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1)])
        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        detections = db.parse_detections(top_bboxes[image_id])

        # if no valid detections
        if len(detections) == 0:
            # shutil.rmtree(Path(result_dir + "/{}".format(image_id[:-4])))
            continue
        else:
            # Save JSON
            with open(result_json, "w") as f:
                json.dump(detections, f)

        # Save also images with labels
        if debug:
            # Get image
            image_file = db.image_file(db_ind)
            image = cv2.imread(image_file)
            im = image[:, :, (2, 1, 0)]

            # Create matplotlib fig
            fig, ax = plt.subplots(figsize=(12, 12))
            fig = ax.imshow(im, aspect='equal')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            for x in detections:
                bbox = x["bbox"]

                # Get points from width and height
                bbox[2] += bbox[0]
                bbox[3] += bbox[1]

                xmin = bbox[0]
                ymin = bbox[1]
                xmax = bbox[2]
                ymax = bbox[3]

                ax.add_patch(
                    plt.Rectangle((xmin, ymin),
                                  xmax - xmin,
                                  ymax - ymin,
                                  fill=False,
                                  edgecolor=colours[j - 1],
                                  linewidth=4.0))
                ax.text(xmin + 1,
                        ymin - 3,
                        '{:s}'.format(x["category_id"]),
                        bbox=dict(facecolor=colours[j - 1],
                                  ec='black',
                                  lw=2,
                                  alpha=0.5),
                        fontsize=15,
                        color='white',
                        weight='bold')

            plt.savefig(result_debug)
            plt.close()

    return 0
Exemple #14
0
def kp_detection(db, nnet, result_dir, debug=False, evaluator=None, repeat=1,
                 isEncAttn=False, isDecAttn=False):
    if db.split != "train":
        db_inds = db.db_inds if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds
    num_images = db_inds.size

    multi_scales = db.configs["test_scales"]

    input_size  = db.configs["input_size"]  # [h w]

    postprocessors = {'curves': PostProcess()}

    for ind in tqdm(range(0, num_images), ncols=67, desc="locating kps"):
        db_ind        = db_inds[ind]
        # image_id      = db.image_ids(db_ind)
        image_file    = db.image_file(db_ind)
        image         = cv2.imread(image_file)
        raw_img = image.copy()
        raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)

        height, width = image.shape[0:2]
        # item  = db.detections(db_ind) # all in the raw coordinate

        for scale in multi_scales:
            images = np.zeros((1, 3, input_size[0], input_size[1]), dtype=np.float32)
            masks = np.ones((1, 1, input_size[0], input_size[1]), dtype=np.float32)
            orig_target_sizes = torch.tensor(input_size).unsqueeze(0).cuda()
            pad_image     = image.copy()
            pad_mask      = np.zeros((height, width, 1), dtype=np.float32)
            resized_image = cv2.resize(pad_image, (input_size[1], input_size[0]))
            resized_mask  = cv2.resize(pad_mask, (input_size[1], input_size[0]))
            masks[0][0]   = resized_mask.squeeze()
            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)
            resized_image = resized_image.transpose(2, 0, 1)
            images[0]     = resized_image
            images        = torch.from_numpy(images).cuda(non_blocking=True)
            masks         = torch.from_numpy(masks).cuda(non_blocking=True)

            # seeking better FPS performance
            images = images.repeat(repeat, 1, 1, 1).cuda(non_blocking=True)
            masks  = masks.repeat(repeat, 1, 1, 1).cuda(non_blocking=True)

            # below codes are used for drawing attention maps
            conv_features, enc_attn_weights, dec_attn_weights = [], [], []
            if isDecAttn or isEncAttn:
                hooks = [
                    nnet.model.module.layer4[-1].register_forward_hook(
                        lambda self, input, output: conv_features.append(output)),
                    nnet.model.module.transformer.encoder.layers[-1].self_attn.register_forward_hook(
                        lambda self, input, output: enc_attn_weights.append(output[1])),
                    nnet.model.module.transformer.decoder.layers[-1].multihead_attn.register_forward_hook(
                        lambda self, input, output: dec_attn_weights.append(output[1]))
                ]

            torch.cuda.synchronize(0)  # 0 is the GPU id
            t0            = time.time()
            outputs, weights = nnet.test([images, masks])
            torch.cuda.synchronize(0)  # 0 is the GPU id
            t             = time.time() - t0

            # below codes are used for drawing attention maps
            if isDecAttn or isEncAttn:
                for hook in hooks:
                    hook.remove()
                conv_features = conv_features[0]
                enc_attn_weights = enc_attn_weights[0]
                dec_attn_weights = dec_attn_weights[0]

            results = postprocessors['curves'](outputs, orig_target_sizes)

            if evaluator is not None:
                evaluator.add_prediction(ind, results.cpu().numpy(), t / repeat)

        if debug:
            img_lst = image_file.split('/')
            lane_debug_dir = os.path.join(result_dir, "lane_debug")
            if not os.path.exists(lane_debug_dir):
                os.makedirs(lane_debug_dir)

            # # Draw dec attn
            if isDecAttn:
                h, w = conv_features.shape[-2:]
                keep = results[0, :, 0].cpu() == 1.
                fig, axs = plt.subplots(ncols=keep.nonzero().shape[0] + 1, nrows=2, figsize=(44, 14))
                # print(keep.nonzero().shape[0], image_file)
                # colors = COLORS * 100
                for idx, ax_i in zip(keep.nonzero(), axs.T):
                    ax = ax_i[0]
                    ax.imshow(dec_attn_weights[0, idx].view(h, w).cpu())
                    ax.axis('off')
                    ax.set_title('query id: [{}]'.format(idx))
                    ax = ax_i[1]
                    preds = db.draw_annotation(ind, pred=results[0][idx].cpu().numpy(), cls_pred=None, img=raw_img)
                    ax.imshow(preds)
                    ax.axis('off')
                fig.tight_layout()
                img_path = os.path.join(lane_debug_dir, 'decAttn_{}_{}_{}.jpg'.format(
                    img_lst[-3], img_lst[-2], os.path.basename(image_file[:-4])))
                plt.savefig(img_path)
                plt.close(fig)

            # # Draw enc attn
            if isEncAttn:
                img_dir = os.path.join(lane_debug_dir, '{}_{}_{}'.format(
                    img_lst[-3], img_lst[-2], os.path.basename(image_file[:-4])))
                if not os.path.exists(img_dir):
                    os.makedirs(img_dir)
                f_map = conv_features
                # print('encoder attention: {}'.format(enc_attn_weights[0].shape))
                # print('feature map: {}'.format(f_map.shape))
                shape = f_map.shape[-2:]
                image_height, image_width, _ = raw_img.shape
                sattn = enc_attn_weights[0].reshape(shape + shape).cpu()
                _, label, _ = db.__getitem__(ind)  # 4, 115
                # print(db.max_points)  # 56
                for i, lane in enumerate(label):
                    if lane[0] == 0:  # Skip invalid lanes
                        continue
                    lane = lane[3:]  # remove conf, upper and lower positions
                    xs = lane[:len(lane) // 2]
                    ys = lane[len(lane) // 2:]
                    ys = ys[xs >= 0]
                    xs = xs[xs >= 0]
                    # norm_idxs = zip(ys, xs)
                    idxs      = np.stack([ys * image_height, xs * image_width], axis=-1)
                    attn_idxs = np.stack([ys * shape[0], xs * shape[1]], axis=-1)

                    for idx_o, idx, num in zip(idxs, attn_idxs, range(xs.shape[0])):
                        fig, axs = plt.subplots(ncols=1, nrows=2, figsize=(20, 14))
                        ax_i = axs.T
                        ax = ax_i[0]
                        ax.imshow(sattn[..., int(idx[0]), int(idx[1])], cmap='cividis', interpolation='nearest')
                        ax.axis('off')
                        ax.set_title('{}'.format(idx_o.astype(int)))
                        ax = ax_i[1]
                        ax.imshow(raw_img)
                        ax.add_patch(plt.Circle((int(idx_o[1]), int(idx_o[0])), color='r', radius=16))
                        ax.axis('off')
                        fig.tight_layout()

                        img_path = os.path.join(img_dir, 'encAttn_lane{}_{}_{}.jpg'.format(
                            i, num, idx_o.astype(int)))
                        plt.savefig(img_path)
                        plt.close(fig)

            if not isEncAttn and not isDecAttn:
                preds = db.draw_annotation(ind, pred=results[0].cpu().numpy(), cls_pred=None, img=image)
                cv2.imwrite(os.path.join(lane_debug_dir, img_lst[-3] + '_'
                                         + img_lst[-2] + '_'
                                         + os.path.basename(image_file[:-4]) + '.jpg'), preds)

    if not debug:
        exp_name = 'tusimple'
        evaluator.exp_name = exp_name
        eval_str, _ = evaluator.eval(label='{}'.format(os.path.basename(exp_name)))
        print(eval_str)

    return 0
Exemple #15
0
def test(db, split, testiter, debug=False, suffix=None):
    result_dir = system_configs.result_dir
    result_dir = os.path.join(result_dir, str(testiter), split)
    class_name = []
    for i in range(1, len(db._coco.cats)):
        # if db._coco.cats[i] is None:
        #     continue
        # else:
        ind = db._cat_ids[i]
        class_name.append(db._coco.cats[ind]['name'])
    if suffix is not None:
        result_dir = os.path.join(result_dir, suffix)

    make_dirs([result_dir])

    test_iter = system_configs.max_iter if testiter is None else testiter
    print("loading parameters at iteration: {}".format(test_iter))

    print("building neural network...")
    nnet = NetworkFactory(db)
    print("loading parameters...")
    nnet.load_params(test_iter)

    # test_file = "test.{}".format(db.data)
    # testing = importlib.import_module(test_file).testing

    nnet.cuda()
    nnet.eval_mode()

    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)

    if db.split != "trainval":
        db_inds = db.db_inds[:100] if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    img_name = os.listdir(db._image_dir)
    for i in range(0, len(img_name)):
        top_bboxes = {}
        # for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        db_ind = i + 1

        # image_id = db.image_ids(db_ind)
        image_id = img_name[i]
        image_file = db._image_dir + '/' + img_name[i]
        image = cv2.imread(image_file)

        height, width = image.shape[0:2]

        detections = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets = kp_decode(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel)
            dets = dets.reshape(2, -1, 8)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            dets = dets.reshape(1, -1, 8)

            _rescale_dets(dets, ratios, borders, sizes)
            dets[:, :, 0:4] /= scale
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]

        # reject detections with negative scores
        keep_inds = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm,
                               weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack([
            top_bboxes[image_id][j][:, -1]
            for j in range(1, categories + 1)
        ])
        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        # result_json = os.path.join(result_dir, "results.json")
        detections = db.convert_to_list(top_bboxes)
        print('demo for {}'.format(image_id))
        img = cv2.imread(image_file)
        box = []
        if detections is not None:
            for i in range(len(detections)):
                name = db._coco.cats[detections[i][1]]['name']  #db._coco.cats[ind]['name']
                confi = detections[i][-1]
                if confi <0.3:
                    continue
                for j in range(0, 4):
                    box.append(detections[i][j + 2])
                cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 255), 1)
                # cv2.putText(img, name[0] + '  ' + '{:.3f}'.format(confi), (int(box[0]), int(box[1] - 10)),
                #             cv2.FONT_ITALIC, 1, (0, 0, 255), 1)
                while (box):
                    box.pop(-1)
        cv2.imshow('Detecting image...', img)
        # timer.total_time = 0
        if cv2.waitKey(3000) & 0xFF == ord('q'):
            break
        print(detections)
Exemple #16
0
def kp_detection(db, k_ind, data_aug, debug):
    data_rng = system_configs.data_rng
    batch_size = system_configs.batch_size

    categories = db.configs["categories"]
    input_size = db.configs["input_size"]
    output_size = db.configs["output_sizes"][0]

    border = db.configs["border"]
    lighting = db.configs["lighting"]
    rand_crop = db.configs["rand_crop"]
    rand_color = db.configs["rand_color"]
    rand_scales = db.configs["rand_scales"]
    gaussian_bump = db.configs["gaussian_bump"]
    gaussian_iou = db.configs["gaussian_iou"]
    gaussian_rad = db.configs["gaussian_radius"]

    max_tag_len = 128

    # allocating memory
    images = np.zeros((batch_size, 3, input_size[0], input_size[1]),
                      dtype=np.float32)
    tl_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    br_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
    tag_lens = np.zeros((batch_size, ), dtype=np.int32)

    db_size = db.db_inds.size
    for b_ind in range(batch_size):
        if not debug and k_ind == 0:
            db.shuffle_inds()

        db_ind = db.db_inds[k_ind]
        k_ind = (k_ind + 1) % db_size

        # reading image
        image_file = db.image_file(db_ind)
        #print(image_file)
        image = cv2.imread(image_file)

        # reading detections
        detections = db.detections(db_ind)

        # cropping an image randomly
        if not debug and rand_crop:
            image, detections = random_crop(image,
                                            detections,
                                            rand_scales,
                                            input_size,
                                            border=border)
        else:
            image, detections = _full_image_crop(image, detections)
        #print("Image_size")
        #print(image.shape)
        image, detections = _resize_image(image, detections, input_size)
        detections = _clip_detections(image, detections)

        width_ratio = output_size[1] / input_size[1]
        height_ratio = output_size[0] / input_size[0]

        if not debug:
            image = image.astype(np.float32) / 255.
            if rand_color:
                color_jittering_(data_rng, image)
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
            normalize_(image, db.mean, db.std)
        images[b_ind] = image.transpose((2, 0, 1))

        for ind, detection in enumerate(detections):

            category = int(detection[-1]) - 1
            #print("Category: %d" %category)
            #print("Detections: %d" % len(detections))
            xtl, ytl = detection[0], detection[1]
            xbr, ybr = detection[2], detection[3]

            fxtl = (xtl * width_ratio)
            fytl = (ytl * height_ratio)
            fxbr = (xbr * width_ratio)
            fybr = (ybr * height_ratio)

            xtl = int(fxtl)
            ytl = int(fytl)
            xbr = int(fxbr)
            ybr = int(fybr)

            if gaussian_bump:
                width = detection[2] - detection[0]
                height = detection[3] - detection[1]

                width = math.ceil(width * width_ratio)
                height = math.ceil(height * height_ratio)

                if gaussian_rad == -1:
                    radius = gaussian_radius((height, width), gaussian_iou)
                    radius = max(0, int(radius))
                else:
                    radius = gaussian_rad

                draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius)
                draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius)
            else:
                tl_heatmaps[b_ind, category, ytl, xtl] = 1
                br_heatmaps[b_ind, category, ybr, xbr] = 1

            tag_ind = tag_lens[b_ind]
            tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl]
            br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr]
            tag_lens[b_ind] += 1
            if tag_lens[b_ind] >= max_tag_len - 1:
                print("Too many targets, skip!")
                print(tag_lens[b_ind])
                print(image_file)
                break
            #print("Pre_tag_ing:%d" %tag_ind)
    for b_ind in range(batch_size):
        tag_len = tag_lens[b_ind]
        tag_masks[b_ind, :tag_len] = 1

    images = torch.from_numpy(images)
    tl_heatmaps = torch.from_numpy(tl_heatmaps)
    br_heatmaps = torch.from_numpy(br_heatmaps)
    tl_regrs = torch.from_numpy(tl_regrs)
    br_regrs = torch.from_numpy(br_regrs)
    tag_masks = torch.from_numpy(tag_masks)

    return {
        "xs": [images],
        "ys": [tl_heatmaps, br_heatmaps, tag_masks, tl_regrs, br_regrs]
    }, k_ind
Exemple #17
0
    def kp_detection(self, image, db, result_dir, debug=False):
        K = db.configs["top_k"]
        ae_threshold = db.configs["ae_threshold"]
        nms_kernel = db.configs["nms_kernel"]
        weight_exp = db.configs["weight_exp"]
        merge_bbox = db.configs["merge_bbox"]
        categories = db.configs["categories"]
        nms_threshold = db.configs["nms_threshold"]
        max_per_image = db.configs["max_per_image"]
        nms_algorithm = {
            "nms": 0,
            "linear_soft_nms": 1,
            "exp_soft_nms": 2
        }[db.configs["nms_algorithm"]]
        top_bboxes = {}
        if True:
            #db_ind = db_inds[ind]
            image_id = 0
            height, width = image.shape[0:2]

            detections = []
            center_points = []

            if True:
                scale = 1
                new_height = int(height * scale)
                new_width = int(width * scale)
                new_center = np.array([new_height // 2, new_width // 2])

                inp_height = new_height | 127
                inp_width = new_width | 127

                images = np.zeros((1, 3, inp_height, inp_width),
                                  dtype=np.float32)
                ratios = np.zeros((1, 2), dtype=np.float32)
                borders = np.zeros((1, 4), dtype=np.float32)
                sizes = np.zeros((1, 2), dtype=np.float32)

                out_height, out_width = (inp_height + 1) // 4, (inp_width +
                                                                1) // 4
                height_ratio = out_height / inp_height
                width_ratio = out_width / inp_width

                resized_image = cv2.resize(image, (new_width, new_height))
                resized_image, border, offset = crop_image(
                    resized_image, new_center, [inp_height, inp_width])

                resized_image = resized_image / 255.
                normalize_(resized_image, db.mean, db.std)

                images[0] = resized_image.transpose((2, 0, 1))
                borders[0] = border
                sizes[0] = [int(height * scale), int(width * scale)]
                ratios[0] = [height_ratio, width_ratio]

                images = np.concatenate((images, images[:, :, :, ::-1]),
                                        axis=0)
                images = torch.from_numpy(images)
                dets, center = self.kp_decode(images,
                                              K,
                                              ae_threshold=ae_threshold,
                                              kernel=nms_kernel)
                dets = dets.reshape(2, -1, 8)
                center = center.reshape(2, -1, 4)
                dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
                center[1, :, [0]] = out_width - center[1, :, [0]]
                dets = dets.reshape(1, -1, 8)
                center = center.reshape(1, -1, 4)

                self._rescale_dets(dets, ratios, borders, sizes)
                center[..., [0]] /= ratios[:, 1][:, None, None]
                center[..., [1]] /= ratios[:, 0][:, None, None]
                center[..., [0]] -= borders[:, 2][:, None, None]
                center[..., [1]] -= borders[:, 0][:, None, None]
                np.clip(center[..., [0]],
                        0,
                        sizes[:, 1][:, None, None],
                        out=center[..., [0]])
                np.clip(center[..., [1]],
                        0,
                        sizes[:, 0][:, None, None],
                        out=center[..., [1]])
                dets[:, :, 0:4] /= scale
                center[:, :, 0:2] /= scale

                if scale == 1:
                    center_points.append(center)
                detections.append(dets)

            detections = np.concatenate(detections, axis=1)
            center_points = np.concatenate(center_points, axis=1)

            classes = detections[..., -1]
            classes = classes[0]
            detections = detections[0]
            center_points = center_points[0]

            valid_ind = detections[:, 4] > -1
            valid_detections = detections[valid_ind]

            box_width = valid_detections[:, 2] - valid_detections[:, 0]
            box_height = valid_detections[:, 3] - valid_detections[:, 1]

            s_ind = (box_width * box_height <= 22500)
            l_ind = (box_width * box_height > 22500)

            s_detections = valid_detections[s_ind]
            l_detections = valid_detections[l_ind]

            s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
            s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
            s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
            s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

            s_temp_score = copy.copy(s_detections[:, 4])
            s_detections[:, 4] = -1

            center_x = center_points[:, 0][:, np.newaxis]
            center_y = center_points[:, 1][:, np.newaxis]
            s_left_x = s_left_x[np.newaxis, :]
            s_right_x = s_right_x[np.newaxis, :]
            s_top_y = s_top_y[np.newaxis, :]
            s_bottom_y = s_bottom_y[np.newaxis, :]

            ind_lx = (center_x - s_left_x) > 0
            ind_rx = (center_x - s_right_x) < 0
            ind_ty = (center_y - s_top_y) > 0
            ind_by = (center_y - s_bottom_y) < 0
            ind_cls = (center_points[:, 2][:, np.newaxis] -
                       s_detections[:, -1][np.newaxis, :]) == 0
            ind_s_new_score = np.max(
                ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
                 (ind_cls + 0)),
                axis=0) == 1
            index_s_new_score = np.argmax(
                ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
                 (ind_cls + 0))[:, ind_s_new_score],
                axis=0)
            s_detections[:, 4][ind_s_new_score] = (
                s_temp_score[ind_s_new_score] * 2 +
                center_points[index_s_new_score, 3]) / 3

            l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
            l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
            l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
            l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

            l_temp_score = copy.copy(l_detections[:, 4])
            l_detections[:, 4] = -1

            center_x = center_points[:, 0][:, np.newaxis]
            center_y = center_points[:, 1][:, np.newaxis]
            l_left_x = l_left_x[np.newaxis, :]
            l_right_x = l_right_x[np.newaxis, :]
            l_top_y = l_top_y[np.newaxis, :]
            l_bottom_y = l_bottom_y[np.newaxis, :]

            ind_lx = (center_x - l_left_x) > 0
            ind_rx = (center_x - l_right_x) < 0
            ind_ty = (center_y - l_top_y) > 0
            ind_by = (center_y - l_bottom_y) < 0
            ind_cls = (center_points[:, 2][:, np.newaxis] -
                       l_detections[:, -1][np.newaxis, :]) == 0
            ind_l_new_score = np.max(
                ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
                 (ind_cls + 0)),
                axis=0) == 1
            index_l_new_score = np.argmax(
                ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
                 (ind_cls + 0))[:, ind_l_new_score],
                axis=0)
            l_detections[:, 4][ind_l_new_score] = (
                l_temp_score[ind_l_new_score] * 2 +
                center_points[index_l_new_score, 3]) / 3

            detections = np.concatenate([l_detections, s_detections], axis=0)
            detections = detections[np.argsort(-detections[:, 4])]
            classes = detections[..., -1]

            keep_inds = (detections[:, 4] > -1)
            detections = detections[keep_inds]
            classes = classes[keep_inds]

            top_bboxes[image_id] = {}
            for j in range(categories):
                keep_inds = (classes == j)
                top_bboxes[image_id][j +
                                     1] = detections[keep_inds][:, 0:7].astype(
                                         np.float32)
                if merge_bbox:
                    soft_nms_merge(top_bboxes[image_id][j + 1],
                                   Nt=nms_threshold,
                                   method=nms_algorithm,
                                   weight_exp=weight_exp)
                else:
                    soft_nms(top_bboxes[image_id][j + 1],
                             Nt=nms_threshold,
                             method=nms_algorithm)
                top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:,
                                                                          0:5]

            scores = np.hstack([
                top_bboxes[image_id][j][:, -1]
                for j in range(1, categories + 1)
            ])
            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, categories + 1):
                    keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                    top_bboxes[image_id][j] = top_bboxes[image_id][j][
                        keep_inds]

            return top_bboxes[image_id]

        return 0
Exemple #18
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    point_json_tl = os.path.join(result_dir, "points_tl.json")
    point_json_br = os.path.join(result_dir, "points_br.json")
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)
    db_inds = db.db_inds
    num_images = db_inds.size

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]
    time_backbones = 0
    time_psns = 0
    if True:

        top_points_tl = {}
        top_points_br = {}
        for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
            db_ind = db_inds[ind]
            image_id = db.image_ids(db_ind)
            image_file = db.image_file(db_ind)
            #print(image_file)
            image = cv2.imread(image_file)

            height, width = image.shape[0:2]

            detections_point_tl = []
            detections_point_br = []
            scale = 1.0
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127
            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = torch.from_numpy(images)
            dets_tl, dets_br, time_backbone, time_psn, flag = decode_func(
                nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel)
            time_backbones += time_backbone
            time_psns += time_psn
            #print('b time:%f' % (time_backbones / float(ind + 1)))
            #print('p time:%f' % (time_psns / float(ind + 1)))
            #print(0)
            #print(dets_tl)
            if not flag:
                print("error when try to test %s" % image_file)
                continue
            #print(dets_tl.shape)
            _rescale_points(dets_tl, ratios, borders, sizes)
            _rescale_points(dets_br, ratios, borders, sizes)
            detections_point_tl.append(dets_tl)
            detections_point_br.append(dets_br)
            detections_point_tl = np.concatenate(detections_point_tl, axis=1)
            detections_point_br = np.concatenate(detections_point_br, axis=1)
            #print('1')
            #print(detections_point.shape)

            classes_p_tl = detections_point_tl[:, 0, 1]
            classes_p_br = detections_point_br[:, 0, 1]
            #print('2')
            #print(classes_p.shape)

            # reject detections with negative scores

            keep_inds_p = (detections_point_tl[:, 0, 0] > 0)
            detections_point_tl = detections_point_tl[keep_inds_p, 0]
            classes_p_tl = classes_p_tl[keep_inds_p]

            keep_inds_p = (detections_point_br[:, 0, 0] > 0)
            detections_point_br = detections_point_br[keep_inds_p, 0]
            classes_p_br = classes_p_br[keep_inds_p]

            #print('3')
            #print(detections_point.shape)

            top_points_tl[image_id] = {}
            top_points_br[image_id] = {}
            for j in range(categories):

                keep_inds_p = (classes_p_tl == j)
                top_points_tl[image_id][
                    j + 1] = detections_point_tl[keep_inds_p].astype(
                        np.float32)
                keep_inds_p = (classes_p_br == j)
                top_points_br[image_id][
                    j + 1] = detections_point_br[keep_inds_p].astype(
                        np.float32)
                #print(top_points[image_id][j + 1][0])

            scores = np.hstack([
                top_points_tl[image_id][j][:, 0]
                for j in range(1, categories + 1)
            ])
            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, categories + 1):
                    keep_inds = (top_points_tl[image_id][j][:, 0] >= thresh)
                    top_points_tl[image_id][j] = top_points_tl[image_id][j][
                        keep_inds]

            scores = np.hstack([
                top_points_br[image_id][j][:, 0]
                for j in range(1, categories + 1)
            ])
            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, categories + 1):
                    keep_inds = (top_points_br[image_id][j][:, 0] >= thresh)
                    top_points_br[image_id][j] = top_points_br[image_id][j][
                        keep_inds]

            if debug:
                image_file = db.image_file(db_ind)
                image = cv2.imread(image_file)

        detections_point_tl = db.convert_to_coco_points_pure(top_points_tl)
        detections_point_br = db.convert_to_coco_points_pure(top_points_br)
        with open(point_json_tl, "w") as f:
            json.dump(detections_point_tl, f)
        with open(point_json_br, "w") as f:
            json.dump(detections_point_br, f)
    '''
    image_ids = [db.image_ids(ind) for ind in db_inds]
    with open(result_json, "r") as f:
        result_json = json.load(f)
    for cls_type in range(1, categories+1):
        db.evaluate(result_json, [cls_type], image_ids)
    '''
    return 0
Exemple #19
0
def kp_detection(db, k_ind, data_aug, debug):
    ################################################################
    # kp_detectin , input whole dataset,
    # from dataset load a batch images and annotations
    # based on the annotations build relevant heatmat, regression tag,
    ################################################################

    # train.py--> train()--->init_parallel_jobs --->for each thread: prefetch_data---> sample_data ---> kp_detection
    # input: in training   db is a MSCOCO instance and dataset is trainval2014
    #        in validation db is a MSCOCO instance and dataset is minival2014
    # k_ind first call it is 0, then it will change inside kp_detection method. yes it is k_ind = (k_ind+1)%db_size
    # data_aug   is true when training , and it is false when validating
    # debug is set in sample_data method. it is set to False in both case
    data_rng   = system_configs.data_rng
    # check in config.py  data_rng = np.random.RandomState(123)
    batch_size = system_configs.batch_size
    # check in CenteNet-104.py   batch_size = 48

    # this is check in COCO class db_config content is listed below,
    # "db": {
    #         "rand_scale_min": 0.6,
    #         "rand_scale_max": 1.4,
    #         "rand_scale_step": 0.1,
    #         "rand_scales": null,
    #
    #         "rand_crop": true,
    #         "rand_color": true,
    #
    #         "border": 128,
    #         "gaussian_bump": true,
    #
    #         "input_size": [511, 511],
    #         "output_sizes": [[128, 128]],
    #
    #         "test_scales": [1],
    #
    #         "top_k": 70,
    #         "categories": 80,
    #         "kp_categories": 1,
    #         "ae_threshold": 0.5,
    #         "nms_threshold": 0.5,
    #
    #         "max_per_image": 100
    #         }
    # and above para is from CenterNet-104.py
    # if there is any para cant find in CenterNet-104,then goto db/detection.py to chekc

    categories   = db.configs["categories"]#  80
    input_size   = db.configs["input_size"]# [511,511]
    output_size  = db.configs["output_sizes"][0] # [ 128, 128]

    border        = db.configs["border"] # 128
    lighting      = db.configs["lighting"] # from detection.py   lighting  = true
    rand_crop     = db.configs["rand_crop"] # true
    rand_color    = db.configs["rand_color"] # true
    rand_scales   = db.configs["rand_scales"]
    # check CenterNet-104.json
    #         "rand_scale_min": 0.6,
    #         "rand_scale_max": 1.4,
    #         "rand_scale_step": 0.1,
    #         "rand_scales": null,
    # and check detection.py
    #             if self._configs["rand_scales"] is None:
    #             self._configs["rand_scales"] = np.arange(
    #                 self._configs["rand_scale_min"],
    #                 self._configs["rand_scale_max"],
    #                 self._configs["rand_scale_step"]
    #             )
    # so here rand_scales = np.arange(0.6,1.4,0.1) that is 0.6  0.7  0.8  0.9 ....  1.4


    gaussian_bump = db.configs["gaussian_bump"] # from detection.py   true
    gaussian_iou  = db.configs["gaussian_iou"] # from detection.py   0.7
    gaussian_rad  = db.configs["gaussian_radius"] # from detection.py  -1

    max_tag_len = 128

    # allocating memory
    images      = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32)
                            #  48     ,3  ,    511,           511
    tl_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
                            #  48     ,     80    ,      128      ,      128
    br_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
                            #  48     ,     80    ,      128      ,      128
    ct_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
                            #  48     ,     80    ,      128      ,      128
    tl_regrs    = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
                            #  48     ,     128    , 2
    br_regrs    = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
                            #  48     ,     128    , 2
    ct_regrs    = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
                            #  48     ,     128    , 2
    tl_tags     = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    br_tags     = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    ct_tags     = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    tag_masks   = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
    tag_lens    = np.zeros((batch_size, ), dtype=np.int32)
                            #    48   ,

    db_size = db.db_inds.size
    # back to db/coco.py to check db.db_inds
    # self._db_inds = np.arange(len(self._image_ids))
    # so here db_size means how many images does this dataset has. eg.10000 images then db_size = 10000
    for b_ind in range(batch_size): # iterate images one by one
        if not debug and k_ind == 0:
            db.shuffle_inds()
            # since when we call, we always set debug to False no matter it is training or validation
            # and k_ind only have one chance to be 0, that is when we first call ke_detection
            # this shuffle_inds() method is written in base.py

        db_ind = db.db_inds[k_ind]
        # db_inds are shuffled in the first iteration, then take the index useing k_ind
        k_ind  = (k_ind + 1) % db_size
        #

        # reading image
        image_file = db.image_file(db_ind)
        image      = cv2.imread(image_file)

        # reading detections
        detections = db.detections(db_ind)
        # db is a MSCOCO instance, and MSCOCO.detection is written in db/coco.py
        # in train.py , MSCOCO is initialized and MSCOCO._detections are filled with all annotations infomation.
        # here db.detections(db_ind)
        # db_ind is the id of an image
        # then use the id to get the annotation of that image
        # so here detections is the label infomation of a single image

        # cropping an image randomly
        if not debug and rand_crop:
            image, detections = random_crop(image, detections, rand_scales, input_size, border=border)
            # image is cropped and detections(bounding box is changed at the same time)
        else:
            image, detections = _full_image_crop(image, detections)

        image, detections = _resize_image(image, detections, input_size)
        # resize image and detections to another shape at the same time.
        # And there is risk that the detections are not within the boundaries of the image.
        detections = _clip_detections(image, detections)
        # so here clip the detections keep you away from above metioned risk.
        # make all the detections within the boundaries


        width_ratio  = output_size[1] / input_size[1]
        height_ratio = output_size[0] / input_size[0]

        #input size and output size can be found in CenterNet-104.json
        # input size = 511,511
        # output size = 128,128
        # so width_ratio = 511/128 = 3.9921875

        # flipping an image randomly
        if not debug and np.random.uniform() > 0.5:
            image[:] = image[:, ::-1, :]
            width    = image.shape[1]
            detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1

        if not debug:
            image = image.astype(np.float32) / 255.
            if rand_color:
                color_jittering_(data_rng, image)
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
            normalize_(image, db.mean, db.std)
        images[b_ind] = image.transpose((2, 0, 1))
        # make image to be channel first

        for ind, detection in enumerate(detections):
            # all these operations are for one single image
            # since below code will apply scale to detections,
            # detections should be integers not within (0,1) range
            category = int(detection[-1]) - 1
            #category = 0

            xtl, ytl = detection[0], detection[1]
            xbr, ybr = detection[2], detection[3]
            xct, yct = (detection[2] + detection[0])/2., (detection[3]+detection[1])/2.

            fxtl = (xtl * width_ratio)
            fytl = (ytl * height_ratio)
            fxbr = (xbr * width_ratio)
            fybr = (ybr * height_ratio)
            fxct = (xct * width_ratio)
            fyct = (yct * height_ratio)

            xtl = int(fxtl)
            ytl = int(fytl)
            xbr = int(fxbr)
            ybr = int(fybr)
            xct = int(fxct)
            yct = int(fyct)

            if gaussian_bump: # CenterNet-104 set to true
                width  = detection[2] - detection[0]# original value
                height = detection[3] - detection[1]

                width  = math.ceil(width * width_ratio) # multiply ratio so it is  for output size
                height = math.ceil(height * height_ratio)

                if gaussian_rad == -1:# -1 means auto calculate gaussian rad
                    # match CenterNet-104 setting
                    radius = gaussian_radius((height, width), gaussian_iou)
                    # gaussian_iou = 0.7
                    radius = max(0, int(radius)) # eg. if an obj bounding box is 50,80, then the radius is just 17 or so
                else:
                    radius = gaussian_rad

                draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius)
                draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius)
                draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius, delte = 5)
                # all three inputs are zeros with shape    48     ,     80    ,      128      ,      128
                # tl_heatmaps[b_ind, category] is 128 x 128
                # top left corner
                # bottom right corner
                # center corner each one has an heatmap
                # about the delte para , topleft and bottom right are both set to 6,
                # why center heatmap set it to 5?
                # in draw_gaussian: sigma=diameter / delte  so the bigger delte ,the smaller sigma, and the heatmap value
                # in that keypoint is higher,
                # here it set the heatmap value of center keypoint larger than two corner keypoints.
                # important****** the

            else:
                tl_heatmaps[b_ind, category, ytl, xtl] = 1
                br_heatmaps[b_ind, category, ybr, xbr] = 1
                ct_heatmaps[b_ind, category, yct, xct] = 1
                # if---else   if is using gaussian distribution,and else if use only one peak

            tag_ind                      = tag_lens[b_ind]
            # tag_lens is (batch_size,)
            # and b_ind is the image index within batch
            # tag_lens is used to store how many detections the image has.
            # you can confirm with 6 lines below
            tl_regrs[b_ind, tag_ind, :]  = [fxtl - xtl, fytl - ytl]
            br_regrs[b_ind, tag_ind, :]  = [fxbr - xbr, fybr - ybr]
            ct_regrs[b_ind, tag_ind, :]  = [fxct - xct, fyct - yct]
            # all the three regression varibles are 3 dementional.
            # (b_ind,tag_ind,2)
            # for example. in one batch we have 48 images,
            # for each image we have differnt numbers of detections, may be first image has 4 detections.
            # may be the second has 15 detections.
            # but when we forward the network,we need it to have stable shape.
            # so here is how these arrays are initialized.
            # ct_regrs    = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
            tl_tags[b_ind, tag_ind]      = ytl * output_size[1] + xtl
            br_tags[b_ind, tag_ind]      = ybr * output_size[1] + xbr
            ct_tags[b_ind, tag_ind]      = yct * output_size[1] + xct
            # these 3 arrays are used together with above three arrays.
            # these 3 are used to store the integer part of the scale to outputsize detection
            # the above 3 variables are used to store the fractions.
            # ct_tags     = np.zeros((batch_size, max_tag_len), dtype=np.int64)
            tag_lens[b_ind]             += 1

    for b_ind in range(batch_size):
        # for image in batches
        tag_len = tag_lens[b_ind]# how many detections the image has
        tag_masks[b_ind, :tag_len] = 1
        # tag_masks first appears in the begining of this method.
        # tag_masks   = np.zeros((batch_size, max_tag_len), dtype=np.uint8) this is how it is initialized


    images      = torch.from_numpy(images)
    tl_heatmaps = torch.from_numpy(tl_heatmaps)
    br_heatmaps = torch.from_numpy(br_heatmaps)
    ct_heatmaps = torch.from_numpy(ct_heatmaps)
    tl_regrs    = torch.from_numpy(tl_regrs)
    br_regrs    = torch.from_numpy(br_regrs)
    ct_regrs    = torch.from_numpy(ct_regrs)
    tl_tags     = torch.from_numpy(tl_tags)
    br_tags     = torch.from_numpy(br_tags)
    ct_tags     = torch.from_numpy(ct_tags)
    tag_masks   = torch.from_numpy(tag_masks)

    return {
        "xs": [images, tl_tags, br_tags, ct_tags],
        "ys": [tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs, br_regrs, ct_regrs]
    }, k_ind
Exemple #20
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)

    partial_num = 3000
    db_inds = db.db_inds[:partial_num] if debug else db.db_inds

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    seq_length = db.configs["max_query_len"]
    bert_model = db.configs["bert_model"]
    textdim = 768 if bert_model == 'bert-base-uncased' else 1024

    top_bboxes = {}
    best_bboxes = {}
    for ind in tqdm(range(db_inds.size), ncols=80, desc="locating kps"):

        db_ind = db_inds[ind]
        image_file = db.images[db_ind][0]

        image, bert_feature, gt_detections, phrase = db.detections_with_phrase(
            db_ind)

        height, width = image.shape[0:2]

        detections = []
        center_points = []
        tl_hms = []
        br_hms = []
        ct_hms = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            bert_features = np.zeros((1, textdim), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            bert_features[0] = bert_feature

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            # Flip to perform detection twice
            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            bert_features = np.concatenate((bert_features, bert_features),
                                           axis=0)

            images = torch.from_numpy(images)
            bert_features = torch.from_numpy(bert_features)
            dets, center, heatmaps = decode_func(nnet, [images, bert_features],
                                                 K,
                                                 ae_threshold=ae_threshold,
                                                 kernel=nms_kernel)
            dets = dets.reshape(2, -1, 8)
            center = center.reshape(2, -1, 4)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            center[1, :, [0]] = out_width - center[1, :, [0]]
            dets = dets.reshape(1, -1, 8)
            center = center.reshape(1, -1, 4)

            tl_hm, br_hm, ct_hm = heatmaps

            _rescale_dets(dets, ratios, borders, sizes)
            center[..., [0]] /= ratios[:, 1][:, None, None]
            center[..., [1]] /= ratios[:, 0][:, None, None]
            center[..., [0]] -= borders[:, 2][:, None, None]
            center[..., [1]] -= borders[:, 0][:, None, None]
            np.clip(center[..., [0]],
                    0,
                    sizes[:, 1][:, None, None],
                    out=center[..., [0]])
            np.clip(center[..., [1]],
                    0,
                    sizes[:, 0][:, None, None],
                    out=center[..., [1]])
            dets[:, :, 0:4] /= scale
            center[:, :, 0:2] /= scale

            if scale == 1:
                center_points.append(center)
                tl_hms.append(tl_hm)
                br_hms.append(br_hm)
                ct_hms.append(ct_hm)
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)
        center_points = np.concatenate(center_points, axis=1)
        tl_hms = np.concatenate(tl_hms, axis=1)
        br_hms = np.concatenate(br_hms, axis=1)
        ct_hms = np.concatenate(ct_hms, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]
        center_points = center_points[0]
        tl_hms = tl_hms[0]
        br_hms = br_hms[0]
        ct_hms = ct_hms[0]

        valid_ind = detections[:, 4] > -1
        valid_detections = detections[valid_ind]

        box_width = valid_detections[:, 2] - valid_detections[:, 0]
        box_height = valid_detections[:, 3] - valid_detections[:, 1]

        s_ind = (box_width * box_height <= 22500)
        l_ind = (box_width * box_height > 22500)

        s_detections = valid_detections[s_ind]
        l_detections = valid_detections[l_ind]

        s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
        s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
        s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
        s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

        s_temp_score = copy.copy(s_detections[:, 4])
        s_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        s_left_x = s_left_x[np.newaxis, :]
        s_right_x = s_right_x[np.newaxis, :]
        s_top_y = s_top_y[np.newaxis, :]
        s_bottom_y = s_bottom_y[np.newaxis, :]

        ind_lx = (center_x - s_left_x) > 0
        ind_rx = (center_x - s_right_x) < 0
        ind_ty = (center_y - s_top_y) > 0
        ind_by = (center_y - s_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   s_detections[:, -1][np.newaxis, :]) == 0
        ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_s_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_s_new_score],
            axis=0)
        s_detections[:, 4][ind_s_new_score] = (
            s_temp_score[ind_s_new_score] * 2 +
            center_points[index_s_new_score, 3]) / 3

        l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
        l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
        l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
        l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

        l_temp_score = copy.copy(l_detections[:, 4])
        l_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        l_left_x = l_left_x[np.newaxis, :]
        l_right_x = l_right_x[np.newaxis, :]
        l_top_y = l_top_y[np.newaxis, :]
        l_bottom_y = l_bottom_y[np.newaxis, :]

        ind_lx = (center_x - l_left_x) > 0
        ind_rx = (center_x - l_right_x) < 0
        ind_ty = (center_y - l_top_y) > 0
        ind_by = (center_y - l_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   l_detections[:, -1][np.newaxis, :]) == 0
        ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_l_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_l_new_score],
            axis=0)
        l_detections[:, 4][ind_l_new_score] = (
            l_temp_score[ind_l_new_score] * 2 +
            center_points[index_l_new_score, 3]) / 3

        detections = np.concatenate([l_detections, s_detections], axis=0)
        detections = detections[np.argsort(-detections[:, 4])]
        classes = detections[..., -1]

        # reject detections with negative scores
        keep_inds = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[db_ind] = {}

        top_bboxes[db_ind] = detections[:, 0:7].astype(np.float32)
        if merge_bbox:
            soft_nms_merge(top_bboxes[db_ind],
                           Nt=nms_threshold,
                           method=nms_algorithm,
                           weight_exp=weight_exp)
        else:
            soft_nms(top_bboxes[db_ind],
                     Nt=nms_threshold,
                     method=nms_algorithm)
        top_bboxes[db_ind] = top_bboxes[db_ind][:, 0:5]

        scores = top_bboxes[db_ind][:, -1]
        if scores is not None and len(scores) > 0:
            best_bboxes[db_ind] = top_bboxes[db_ind][np.argmax(scores)]
        else:
            best_bboxes[db_ind] = None

        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            keep_inds = (top_bboxes[db_ind][:, -1] >= thresh)
            top_bboxes[db_ind] = top_bboxes[db_ind][keep_inds]

        if debug:
            image_file = db.image_file(db_ind)
            image = cv2.imread(image_file)
            im = image[:, :, (2, 1, 0)]
            fig, ax = plt.subplots(figsize=(28, 12))

            ax = plt.subplot(152)
            fig = ax.imshow(im, aspect='equal')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            if best_bboxes[db_ind] is not None:
                bbox = best_bboxes[db_ind].astype(np.int32)
                xmin = bbox[0]
                ymin = bbox[1]
                xmax = bbox[2]
                ymax = bbox[3]
                ax.add_patch(
                    plt.Rectangle((xmin, ymin),
                                  xmax - xmin,
                                  ymax - ymin,
                                  fill=False,
                                  edgecolor='red',
                                  linewidth=5.0))
                ax.text(xmin + 1,
                        ymin - 3,
                        'prediction',
                        bbox=dict(facecolor='red', ec='black', lw=2,
                                  alpha=0.5),
                        fontsize=15,
                        color='white',
                        weight='bold')

            ax = plt.subplot(151)
            fig = ax.imshow(im, aspect='equal')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            bbox = gt_detections[0].astype(np.int32)
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]
            ax.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              fill=False,
                              edgecolor='red',
                              linewidth=5.0))
            ax.text(xmin + 1,
                    ymin - 3,
                    phrase,
                    bbox=dict(facecolor='red', ec='black', lw=2, alpha=0.5),
                    fontsize=15,
                    color='white',
                    weight='bold')

            ax = plt.subplot(153)
            ax.imshow(tl_hms[0], cmap='jet')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            ax = plt.subplot(154)
            ax.imshow(br_hms[0], cmap='jet')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            ax = plt.subplot(155)
            ax.imshow(ct_hms[0], cmap='jet')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind))
            debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind))
            # plt.savefig(debug_file1)
            plt.savefig(debug_file2)
            plt.close()

    result_json = os.path.join(result_dir, "results.json")
    detections = db.convert_to_json(top_bboxes)
    with open(result_json, "w") as f:
        json.dump(detections, f)

    db.evaluate(best_bboxes)
    return 0
Exemple #21
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)

    if db.split != "trainval":
        db_inds = db.db_inds[:100] if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]
    num_images = db_inds.size

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    top_bboxes = {}
    for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        db_ind = db_inds[ind]

        image_id = db.image_ids(db_ind)
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)

        height, width = image.shape[0:2]

        detections = []
        center_points = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets, center = decode_func(nnet,
                                       images,
                                       K,
                                       ae_threshold=ae_threshold,
                                       kernel=nms_kernel)
            dets = dets.reshape(2, -1, 8)
            center = center.reshape(2, -1, 4)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            center[1, :, [0]] = out_width - center[1, :, [0]]
            dets = dets.reshape(1, -1, 8)
            center = center.reshape(1, -1, 4)

            _rescale_dets(dets, ratios, borders, sizes)
            center[..., [0]] /= ratios[:, 1][:, None, None]
            center[..., [1]] /= ratios[:, 0][:, None, None]
            center[..., [0]] -= borders[:, 2][:, None, None]
            center[..., [1]] -= borders[:, 0][:, None, None]
            np.clip(center[..., [0]],
                    0,
                    sizes[:, 1][:, None, None],
                    out=center[..., [0]])
            np.clip(center[..., [1]],
                    0,
                    sizes[:, 0][:, None, None],
                    out=center[..., [1]])
            dets[:, :, 0:4] /= scale
            center[:, :, 0:2] /= scale

            if scale == 1:
                center_points.append(center)
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)
        center_points = np.concatenate(center_points, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]
        center_points = center_points[0]

        valid_ind = detections[:, 4] > -1
        valid_detections = detections[valid_ind]

        box_width = valid_detections[:, 2] - valid_detections[:, 0]
        box_height = valid_detections[:, 3] - valid_detections[:, 1]

        s_ind = (box_width * box_height <= 22500)
        l_ind = (box_width * box_height > 22500)

        s_detections = valid_detections[s_ind]
        l_detections = valid_detections[l_ind]

        s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
        s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
        s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
        s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

        s_temp_score = copy.copy(s_detections[:, 4])
        s_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        s_left_x = s_left_x[np.newaxis, :]
        s_right_x = s_right_x[np.newaxis, :]
        s_top_y = s_top_y[np.newaxis, :]
        s_bottom_y = s_bottom_y[np.newaxis, :]

        ind_lx = (center_x - s_left_x) > 0
        ind_rx = (center_x - s_right_x) < 0
        ind_ty = (center_y - s_top_y) > 0
        ind_by = (center_y - s_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   s_detections[:, -1][np.newaxis, :]) == 0
        ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_s_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_s_new_score],
            axis=0)
        s_detections[:, 4][ind_s_new_score] = (
            s_temp_score[ind_s_new_score] * 2 +
            center_points[index_s_new_score, 3]) / 3

        l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
        l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
        l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
        l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

        l_temp_score = copy.copy(l_detections[:, 4])
        l_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        l_left_x = l_left_x[np.newaxis, :]
        l_right_x = l_right_x[np.newaxis, :]
        l_top_y = l_top_y[np.newaxis, :]
        l_bottom_y = l_bottom_y[np.newaxis, :]

        ind_lx = (center_x - l_left_x) > 0
        ind_rx = (center_x - l_right_x) < 0
        ind_ty = (center_y - l_top_y) > 0
        ind_by = (center_y - l_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   l_detections[:, -1][np.newaxis, :]) == 0
        ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_l_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_l_new_score],
            axis=0)
        l_detections[:, 4][ind_l_new_score] = (
            l_temp_score[ind_l_new_score] * 2 +
            center_points[index_l_new_score, 3]) / 3

        detections = np.concatenate([l_detections, s_detections], axis=0)
        detections = detections[np.argsort(-detections[:, 4])]
        classes = detections[..., -1]

        #for i in range(detections.shape[0]):
        #   box_width = detections[i,2]-detections[i,0]
        #   box_height = detections[i,3]-detections[i,1]
        #   if box_width*box_height<=22500 and detections[i,4]!=-1:
        #     left_x = (2*detections[i,0]+1*detections[i,2])/3
        #     right_x = (1*detections[i,0]+2*detections[i,2])/3
        #     top_y = (2*detections[i,1]+1*detections[i,3])/3
        #     bottom_y = (1*detections[i,1]+2*detections[i,3])/3
        #     temp_score = copy.copy(detections[i,4])
        #     detections[i,4] = -1
        #     for j in range(center_points.shape[0]):
        #        if (classes[i] == center_points[j,2])and \
        #           (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
        #           ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
        #           detections[i,4] = (temp_score*2 + center_points[j,3])/3
        #           break
        #   elif box_width*box_height > 22500 and detections[i,4]!=-1:
        #     left_x = (3*detections[i,0]+2*detections[i,2])/5
        #     right_x = (2*detections[i,0]+3*detections[i,2])/5
        #     top_y = (3*detections[i,1]+2*detections[i,3])/5
        #     bottom_y = (2*detections[i,1]+3*detections[i,3])/5
        #     temp_score = copy.copy(detections[i,4])
        #     detections[i,4] = -1
        #     for j in range(center_points.shape[0]):
        #        if (classes[i] == center_points[j,2])and \
        #           (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
        #           ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
        #           detections[i,4] = (temp_score*2 + center_points[j,3])/3
        #           break
        # reject detections with negative scores
        keep_inds = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(
                np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1],
                               Nt=nms_threshold,
                               method=nms_algorithm,
                               weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1],
                         Nt=nms_threshold,
                         method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack(
            [top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1)])
        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        if debug:
            image_file = db.image_file(db_ind)
            image = cv2.imread(image_file)
            im = image[:, :, (2, 1, 0)]
            fig, ax = plt.subplots(figsize=(12, 12))
            fig = ax.imshow(im, aspect='equal')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)
            #bboxes = {}
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4)
                cat_name = db.class_name(j)
                for bbox in top_bboxes[image_id][j][keep_inds]:
                    bbox = bbox[0:4].astype(np.int32)
                    xmin = bbox[0]
                    ymin = bbox[1]
                    xmax = bbox[2]
                    ymax = bbox[3]
                    #if (xmax - xmin) * (ymax - ymin) > 5184:
                    ax.add_patch(
                        plt.Rectangle((xmin, ymin),
                                      xmax - xmin,
                                      ymax - ymin,
                                      fill=False,
                                      edgecolor=colours[j - 1],
                                      linewidth=4.0))
                    ax.text(xmin + 1,
                            ymin - 3,
                            '{:s}'.format(cat_name),
                            bbox=dict(facecolor=colours[j - 1],
                                      ec='black',
                                      lw=2,
                                      alpha=0.5),
                            fontsize=15,
                            color='white',
                            weight='bold')

            debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind))
            debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind))
            plt.savefig(debug_file1)
            plt.savefig(debug_file2)
            plt.close()
            #cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])

    result_json = os.path.join(result_dir, "results.json")
    detections = db.convert_to_coco(top_bboxes)
    with open(result_json, "w") as f:
        json.dump(detections, f)

    cls_ids = list(range(1, categories + 1))
    image_ids = [db.image_ids(ind) for ind in db_inds]
    db.evaluate(result_json, cls_ids, image_ids)
    return 0
def kp_detection(db, k_ind, data_aug, debug):
    data_rng = system_configs.data_rng
    batch_size = system_configs.batch_size

    categories = db.configs["categories"]
    input_size = db.configs["input_size"]
    output_size = db.configs["output_sizes"][0]

    border = db.configs["border"]
    lighting = db.configs["lighting"]
    rand_crop = db.configs["rand_crop"]
    rand_color = db.configs["rand_color"]
    rand_scales = db.configs["rand_scales"]
    gaussian_bump = db.configs["gaussian_bump"]
    gaussian_iou = db.configs["gaussian_iou"]
    gaussian_rad = db.configs["gaussian_radius"]

    max_tag_len = 128

    # allocating memory
    images = np.zeros((batch_size, 3, input_size[0], input_size[1]),
                      dtype=np.float32)
    t_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    l_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    b_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    r_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    ct_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    t_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    l_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    b_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    r_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    t_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    l_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    b_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    r_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
    tag_lens = np.zeros((batch_size, ), dtype=np.int32)

    db_size = db.db_inds.size
    for b_ind in range(batch_size):
        if not debug and k_ind == 0:
            db.shuffle_inds()

        db_ind = db.db_inds[k_ind]
        k_ind = (k_ind + 1) % db_size

        # reading image
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)

        # reading detections
        detections, extreme_pts = db.detections(db_ind)

        # cropping an image randomly
        if rand_crop:
            image, detections, extreme_pts = random_crop_pts(image,
                                                             detections,
                                                             extreme_pts,
                                                             rand_scales,
                                                             input_size,
                                                             border=border)
        else:
            assert 0
            # image, detections = _full_image_crop(image, detections)

        image, detections, extreme_pts = _resize_image_pts(
            image, detections, extreme_pts, input_size)
        detections, extreme_pts = _clip_detections_pts(image, detections,
                                                       extreme_pts)

        width_ratio = output_size[1] / input_size[1]
        height_ratio = output_size[0] / input_size[0]

        # flipping an image randomly
        if np.random.uniform() > 0.5:
            image[:] = image[:, ::-1, :]
            width = image.shape[1]
            detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1
            extreme_pts[:, :, 0] = width - extreme_pts[:, :, 0] - 1
            extreme_pts[:, 1, :], extreme_pts[:, 3, :] = \
                extreme_pts[:, 3, :].copy(), extreme_pts[:, 1, :].copy()

        image = image.astype(np.float32) / 255.
        if not debug:
            if rand_color:
                color_jittering_(data_rng, image)
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
        normalize_(image, db.mean, db.std)
        images[b_ind] = image.transpose((2, 0, 1))

        for ind, detection in enumerate(detections):
            category = int(detection[-1]) - 1
            extreme_pt = extreme_pts[ind]

            xt, yt = extreme_pt[0, 0], extreme_pt[0, 1]
            xl, yl = extreme_pt[1, 0], extreme_pt[1, 1]
            xb, yb = extreme_pt[2, 0], extreme_pt[2, 1]
            xr, yr = extreme_pt[3, 0], extreme_pt[3, 1]
            xct = (xl + xr) / 2
            yct = (yt + yb) / 2

            fxt = (xt * width_ratio)
            fyt = (yt * height_ratio)
            fxl = (xl * width_ratio)
            fyl = (yl * height_ratio)
            fxb = (xb * width_ratio)
            fyb = (yb * height_ratio)
            fxr = (xr * width_ratio)
            fyr = (yr * height_ratio)
            fxct = (xct * width_ratio)
            fyct = (yct * height_ratio)

            xt = int(fxt)
            yt = int(fyt)
            xl = int(fxl)
            yl = int(fyl)
            xb = int(fxb)
            yb = int(fyb)
            xr = int(fxr)
            yr = int(fyr)
            xct = int(fxct)
            yct = int(fyct)

            if gaussian_bump:
                width = detection[2] - detection[0]
                height = detection[3] - detection[1]

                width = math.ceil(width * width_ratio)
                height = math.ceil(height * height_ratio)

                if gaussian_rad == -1:
                    radius = gaussian_radius((height, width), gaussian_iou)
                    radius = max(0, int(radius))
                else:
                    radius = gaussian_rad
                draw_gaussian(t_heatmaps[b_ind, category], [xt, yt], radius)
                draw_gaussian(l_heatmaps[b_ind, category], [xl, yl], radius)
                draw_gaussian(b_heatmaps[b_ind, category], [xb, yb], radius)
                draw_gaussian(r_heatmaps[b_ind, category], [xr, yr], radius)
                draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius)
            else:
                t_heatmaps[b_ind, category, yt, xt] = 1
                l_heatmaps[b_ind, category, yl, xl] = 1
                b_heatmaps[b_ind, category, yb, xb] = 1
                r_heatmaps[b_ind, category, yr, xr] = 1

            tag_ind = tag_lens[b_ind]
            t_regrs[b_ind, tag_ind, :] = [fxt - xt, fyt - yt]
            l_regrs[b_ind, tag_ind, :] = [fxl - xl, fyl - yl]
            b_regrs[b_ind, tag_ind, :] = [fxb - xb, fyb - yb]
            r_regrs[b_ind, tag_ind, :] = [fxr - xr, fyr - yr]
            t_tags[b_ind, tag_ind] = yt * output_size[1] + xt
            l_tags[b_ind, tag_ind] = yl * output_size[1] + xl
            b_tags[b_ind, tag_ind] = yb * output_size[1] + xb
            r_tags[b_ind, tag_ind] = yr * output_size[1] + xr
            ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct
            tag_lens[b_ind] += 1

    for b_ind in range(batch_size):
        tag_len = tag_lens[b_ind]
        tag_masks[b_ind, :tag_len] = 1

    if debug:
        debugger = Debugger(num_classes=80)
        t_hm = debugger.gen_colormap(t_heatmaps[0])
        l_hm = debugger.gen_colormap(l_heatmaps[0])
        b_hm = debugger.gen_colormap(b_heatmaps[0])
        r_hm = debugger.gen_colormap(r_heatmaps[0])
        ct_hm = debugger.gen_colormap(ct_heatmaps[0])
        img = images[0] * db.std.reshape(3, 1, 1) + db.mean.reshape(3, 1, 1)
        img = (img * 255).astype(np.uint8).transpose(1, 2, 0)
        debugger.add_blend_img(img, t_hm, 't_hm')
        debugger.add_blend_img(img, l_hm, 'l_hm')
        debugger.add_blend_img(img, b_hm, 'b_hm')
        debugger.add_blend_img(img, r_hm, 'r_hm')
        debugger.add_blend_img(
            img, np.maximum(np.maximum(t_hm, l_hm), np.maximum(b_hm, r_hm)),
            'extreme')
        debugger.add_blend_img(img, ct_hm, 'center')
        debugger.show_all_imgs(pause=True)

    images = torch.from_numpy(images)
    t_heatmaps = torch.from_numpy(t_heatmaps)
    l_heatmaps = torch.from_numpy(l_heatmaps)
    b_heatmaps = torch.from_numpy(b_heatmaps)
    r_heatmaps = torch.from_numpy(r_heatmaps)
    ct_heatmaps = torch.from_numpy(ct_heatmaps)
    t_regrs = torch.from_numpy(t_regrs)
    l_regrs = torch.from_numpy(l_regrs)
    b_regrs = torch.from_numpy(b_regrs)
    r_regrs = torch.from_numpy(r_regrs)
    t_tags = torch.from_numpy(t_tags)
    l_tags = torch.from_numpy(l_tags)
    b_tags = torch.from_numpy(b_tags)
    r_tags = torch.from_numpy(r_tags)
    ct_tags = torch.from_numpy(ct_tags)
    tag_masks = torch.from_numpy(tag_masks)

    return {
        "xs": [images, t_tags, l_tags, b_tags, r_tags, ct_tags],
        "ys": [
            t_heatmaps, l_heatmaps, b_heatmaps, r_heatmaps, ct_heatmaps,
            tag_masks, t_regrs, l_regrs, b_regrs, r_regrs
        ]
    }, k_ind
Exemple #23
0
            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, mean, std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets = kp_decode(nnet,
                             images,
                             K,
                             aggr_weight=aggr_weight,
                             scores_thresh=scores_thresh,
                             center_thresh=center_thresh,
                             kernel=nms_kernel,
Exemple #24
0
def apply_detection(image,
                    nnet,
                    scales,
                    decode_func,
                    top_k,
                    avg,
                    std,
                    categories,
                    merge_bbox,
                    max_per_image=100,
                    ae_threshold=0.5,
                    nms_kernel=3,
                    nms_algorithm=2,
                    nms_threshold=0.45,
                    weight_exp=1):

    height, width = image.shape[0:2]

    detections = []
    center_points = []

    for scale in scales:
        new_height = int(height * scale)
        new_width = int(width * scale)
        new_center = np.array([new_height // 2, new_width // 2])

        # N | M = M if N <= M else (N%M)*M+1
        inp_height = new_height | 127
        inp_width = new_width | 127

        images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
        ratios = np.zeros((1, 2), dtype=np.float32)
        borders = np.zeros((1, 4), dtype=np.float32)
        sizes = np.zeros((1, 2), dtype=np.float32)

        out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
        height_ratio = out_height / inp_height
        width_ratio = out_width / inp_width

        resized_image = cv2.resize(image, (new_width, new_height))
        resized_image, border, offset = crop_image(resized_image, new_center,
                                                   [inp_height, inp_width])

        resized_image = resized_image / 255.
        normalize_(resized_image, avg, std)

        images[0] = resized_image.transpose((2, 0, 1))
        borders[0] = border
        sizes[0] = [int(height * scale), int(width * scale)]
        ratios[0] = [height_ratio, width_ratio]

        images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        # do detection
        dets, center = decode_func(nnet,
                                   images,
                                   top_k,
                                   ae_threshold=ae_threshold,
                                   kernel=nms_kernel)
        # post processing
        dets = dets.reshape(2, -1, 8)
        center = center.reshape(2, -1, 4)
        dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
        center[1, :, [0]] = out_width - center[1, :, [0]]
        dets = dets.reshape(1, -1, 8)
        center = center.reshape(1, -1, 4)

        _rescale_dets(dets, ratios, borders, sizes)
        center[..., [0]] /= ratios[:, 1][:, None, None]
        center[..., [1]] /= ratios[:, 0][:, None, None]
        center[..., [0]] -= borders[:, 2][:, None, None]
        center[..., [1]] -= borders[:, 0][:, None, None]
        np.clip(center[..., [0]],
                0,
                sizes[:, 1][:, None, None],
                out=center[..., [0]])
        np.clip(center[..., [1]],
                0,
                sizes[:, 0][:, None, None],
                out=center[..., [1]])
        dets[:, :, 0:4] /= scale
        center[:, :, 0:2] /= scale

        if scale == 1:
            center_points.append(center)
        detections.append(dets)

    detections = np.concatenate(detections, axis=1)
    center_points = np.concatenate(center_points, axis=1)

    classes = detections[..., -1]
    classes = classes[0]
    detections = detections[0]
    center_points = center_points[0]

    valid_ind = detections[:, 4] > -1
    valid_detections = detections[valid_ind]

    box_width = valid_detections[:, 2] - valid_detections[:, 0]
    box_height = valid_detections[:, 3] - valid_detections[:, 1]

    s_ind = (box_width * box_height <= 22500)
    l_ind = (box_width * box_height > 22500)

    s_detections = valid_detections[s_ind]
    l_detections = valid_detections[l_ind]

    s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
    s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
    s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
    s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

    s_temp_score = copy.copy(s_detections[:, 4])
    s_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    s_left_x = s_left_x[np.newaxis, :]
    s_right_x = s_right_x[np.newaxis, :]
    s_top_y = s_top_y[np.newaxis, :]
    s_bottom_y = s_bottom_y[np.newaxis, :]

    ind_lx = (center_x - s_left_x) > 0
    ind_rx = (center_x - s_right_x) < 0
    ind_ty = (center_y - s_top_y) > 0
    ind_by = (center_y - s_bottom_y) < 0
    ind_cls = (center_points[:, 2][:, np.newaxis] -
               s_detections[:, -1][np.newaxis, :]) == 0
    ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                              (ind_by + 0) & (ind_cls + 0)),
                             axis=0) == 1
    index_s_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
         (ind_cls + 0))[:, ind_s_new_score],
        axis=0)
    s_detections[:, 4][ind_s_new_score] = (
        s_temp_score[ind_s_new_score] * 2 +
        center_points[index_s_new_score, 3]) / 3

    l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
    l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
    l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
    l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

    l_temp_score = copy.copy(l_detections[:, 4])
    l_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    l_left_x = l_left_x[np.newaxis, :]
    l_right_x = l_right_x[np.newaxis, :]
    l_top_y = l_top_y[np.newaxis, :]
    l_bottom_y = l_bottom_y[np.newaxis, :]

    ind_lx = (center_x - l_left_x) > 0
    ind_rx = (center_x - l_right_x) < 0
    ind_ty = (center_y - l_top_y) > 0
    ind_by = (center_y - l_bottom_y) < 0
    ind_cls = (center_points[:, 2][:, np.newaxis] -
               l_detections[:, -1][np.newaxis, :]) == 0
    ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                              (ind_by + 0) & (ind_cls + 0)),
                             axis=0) == 1
    index_l_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
         (ind_cls + 0))[:, ind_l_new_score],
        axis=0)
    l_detections[:, 4][ind_l_new_score] = (
        l_temp_score[ind_l_new_score] * 2 +
        center_points[index_l_new_score, 3]) / 3

    detections = np.concatenate([l_detections, s_detections], axis=0)
    detections = detections[np.argsort(-detections[:, 4])]
    classes = detections[..., -1]

    #for i in range(detections.shape[0]):
    #   box_width = detections[i,2]-detections[i,0]
    #   box_height = detections[i,3]-detections[i,1]
    #   if box_width*box_height<=22500 and detections[i,4]!=-1:
    #     left_x = (2*detections[i,0]+1*detections[i,2])/3
    #     right_x = (1*detections[i,0]+2*detections[i,2])/3
    #     top_y = (2*detections[i,1]+1*detections[i,3])/3
    #     bottom_y = (1*detections[i,1]+2*detections[i,3])/3
    #     temp_score = copy.copy(detections[i,4])
    #     detections[i,4] = -1
    #     for j in range(center_points.shape[0]):
    #        if (classes[i] == center_points[j,2])and \
    #           (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
    #           ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
    #           detections[i,4] = (temp_score*2 + center_points[j,3])/3
    #           break
    #   elif box_width*box_height > 22500 and detections[i,4]!=-1:
    #     left_x = (3*detections[i,0]+2*detections[i,2])/5
    #     right_x = (2*detections[i,0]+3*detections[i,2])/5
    #     top_y = (3*detections[i,1]+2*detections[i,3])/5
    #     bottom_y = (2*detections[i,1]+3*detections[i,3])/5
    #     temp_score = copy.copy(detections[i,4])
    #     detections[i,4] = -1
    #     for j in range(center_points.shape[0]):
    #        if (classes[i] == center_points[j,2])and \
    #           (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
    #           ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
    #           detections[i,4] = (temp_score*2 + center_points[j,3])/3
    #           break
    # reject detections with negative scores
    keep_inds = (detections[:, 4] > -1)
    detections = detections[keep_inds]
    classes = classes[keep_inds]

    final_bboxes = {}
    for j in range(categories):
        keep_inds = (classes == j)
        final_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
        if merge_bbox:
            soft_nms_merge(final_bboxes[j + 1],
                           Nt=nms_threshold,
                           method=nms_algorithm,
                           weight_exp=weight_exp)
        else:
            soft_nms(final_bboxes[j + 1],
                     Nt=nms_threshold,
                     method=nms_algorithm)
        final_bboxes[j + 1] = final_bboxes[j + 1][:, 0:5]

    scores = np.hstack(
        [final_bboxes[j][:, -1] for j in range(1, categories + 1)])
    if len(scores) > max_per_image:
        kth = len(scores) - max_per_image
        thresh = np.partition(scores, kth)[kth]
        for j in range(1, categories + 1):
            keep_inds = (final_bboxes[j][:, -1] >= thresh)
            final_bboxes[j] = final_bboxes[j][keep_inds]
    return final_bboxes
Exemple #25
0
def kp_detection(db, k_ind, data_aug, debug):
    data_rng = system_configs.data_rng
    batch_size = system_configs.batch_size

    categories = db.configs["categories"]
    input_size = db.configs["input_size"]
    output_size = db.configs["output_sizes"][0]

    border = db.configs["border"]
    lighting = db.configs["lighting"]
    rand_crop = db.configs["rand_crop"]
    rand_color = db.configs["rand_color"]
    rand_scales = db.configs["rand_scales"]
    gaussian_bump = db.configs["gaussian_bump"]
    gaussian_iou = db.configs["gaussian_iou"]
    gaussian_rad = db.configs["gaussian_radius"]

    max_tag_len = 500

    # allocating memory
    images = np.zeros((batch_size, 3, input_size[0], input_size[1]),
                      dtype=np.float32)
    tl_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    br_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    ct_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
    tag_lens = np.zeros((batch_size, ), dtype=np.int32)

    db_size = db.db_inds.size
    for b_ind in range(batch_size):
        if not debug and k_ind == 0:
            db.shuffle_inds()

        db_ind = db.db_inds[k_ind]
        k_ind = (k_ind + 1) % db_size

        # reading image
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)
        #         print("reading image", image_file)
        # reading detections
        detections = db.detections(db_ind)
        #         print("reading detections", detections)

        # cropping an image randomly
        if not debug and rand_crop:
            image, detections = random_crop(image,
                                            detections,
                                            rand_scales,
                                            input_size,
                                            border=border)
        else:
            image, detections = _full_image_crop(image, detections)

        image, detections = _resize_image(image, detections, input_size)
        detections = _clip_detections(image, detections)

        width_ratio = output_size[1] / input_size[1]
        height_ratio = output_size[0] / input_size[0]

        # flipping an image randomly
        if not debug and np.random.uniform() > 0.5:
            image[:] = image[:, ::-1, :]
            width = image.shape[1]
            detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1
#         print("after flopping", detections)
        if not debug:
            image = image.astype(np.float32) / 255.
            if rand_color:
                #                 print("before rand color")
                #                 print(data_rng)
                color_jittering_(data_rng, image)
                #                 print("this test for color")
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)


#             print("after rand color")
#             image = image.astype(np.float32) / 255.
            normalize_(image, db.mean, db.std)
        images[b_ind] = image.transpose((2, 0, 1))
        #         print("modify detections", detections)
        for ind, detection in enumerate(detections):
            category = int(detection[-1]) - 1
            #category = 0

            xtl, ytl = detection[0], detection[1]
            xbr, ybr = detection[2], detection[3]
            xct, yct = (detection[2] + detection[0]) / 2., (detection[3] +
                                                            detection[1]) / 2.

            fxtl = (xtl * width_ratio)
            fytl = (ytl * height_ratio)
            fxbr = (xbr * width_ratio)
            fybr = (ybr * height_ratio)
            fxct = (xct * width_ratio)
            fyct = (yct * height_ratio)

            xtl = int(fxtl)
            ytl = int(fytl)
            xbr = int(fxbr)
            ybr = int(fybr)
            xct = int(fxct)
            yct = int(fyct)

            if gaussian_bump:
                width = detection[2] - detection[0]
                height = detection[3] - detection[1]

                width = math.ceil(width * width_ratio)
                height = math.ceil(height * height_ratio)

                if gaussian_rad == -1:
                    radius = gaussian_radius((height, width), gaussian_iou)
                    radius = max(0, int(radius))
                else:
                    radius = gaussian_rad

                draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius)
                draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius)
                draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct],
                              radius,
                              delte=5)

            else:
                tl_heatmaps[b_ind, category, ytl, xtl] = 1
                br_heatmaps[b_ind, category, ybr, xbr] = 1
                ct_heatmaps[b_ind, category, yct, xct] = 1

            tag_ind = tag_lens[b_ind]
            tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl]
            br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr]
            ct_regrs[b_ind, tag_ind, :] = [fxct - xct, fyct - yct]
            tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl
            br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr
            ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct
            tag_lens[b_ind] += 1

    for b_ind in range(batch_size):
        tag_len = tag_lens[b_ind]
        tag_masks[b_ind, :tag_len] = 1

    images = torch.from_numpy(images)
    tl_heatmaps = torch.from_numpy(tl_heatmaps)
    br_heatmaps = torch.from_numpy(br_heatmaps)
    ct_heatmaps = torch.from_numpy(ct_heatmaps)
    tl_regrs = torch.from_numpy(tl_regrs)
    br_regrs = torch.from_numpy(br_regrs)
    ct_regrs = torch.from_numpy(ct_regrs)
    tl_tags = torch.from_numpy(tl_tags)
    br_tags = torch.from_numpy(br_tags)
    ct_tags = torch.from_numpy(ct_tags)
    tag_masks = torch.from_numpy(tag_masks)
    #     print("finish this image")
    return {
        "xs": [images, tl_tags, br_tags, ct_tags],
        "ys": [
            tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs,
            br_regrs, ct_regrs
        ]
    }, k_ind
Exemple #26
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)

    if db.split != "trainval":
        db_inds = db.db_inds[:100] if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]
    num_images = db_inds.size

    K             = db.configs["top_k"]
    ae_threshold  = db.configs["ae_threshold"]
    nms_kernel    = db.configs["nms_kernel"]
    
    scales        = db.configs["test_scales"]
    weight_exp    = db.configs["weight_exp"]
    merge_bbox    = db.configs["merge_bbox"]
    categories    = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1, 
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    max_height = 600
    max_width = 1000
    detections = []
    for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        db_ind = db_inds[ind]

        image_file = db.image_file(db_ind)
        ori_image = cv2.imread(image_file)
        ori_height, ori_width = ori_image.shape[0:2]
        print("image nmae: %s, width: %d, height: %d" %(image_file, ori_width, ori_height))
        height = min(max_height, ori_height)
        width = min(max_width, ori_width)
        input_image = cv2.resize(ori_image, (width, height))

        inp_height = max_height | 127
        inp_width = max_width | 127
        input_image_full = cv2.resize(ori_image, (inp_width, inp_height))
        images = np.zeros((2, 3, inp_height, inp_width), dtype=np.float32)

        input_image = input_image / 255.
        normalize_(input_image, db.mean, db.std)
        input_image_full = input_image_full / 255.
        normalize_(input_image_full, db.mean, db.std)
        out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4

        images[0, :, 0:height, 0:width] = input_image.transpose((2, 0, 1))
        images[1] = input_image_full.transpose((2, 0, 1))
        images = torch.from_numpy(images)
        detections_tl, detections_br, flag = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel)


        ratio_height = inp_height / out_height * ori_height / height
        ratio_width = inp_width / out_width * ori_width / width
        detections_tl[3, 0] *= ratio_width
        detections_tl[4, 0] *= ratio_height
        detections_br[3, 0] *= ratio_width
        detections_br[4, 0] *= ratio_height

        ratio_height = inp_height / out_height * ori_height / inp_height
        ratio_width = inp_width / out_width * ori_width / inp_width
        detections_tl[3, 1] *= ratio_width
        detections_tl[4, 1] *= ratio_height
        detections_br[3, 1] *= ratio_width
        detections_br[4, 1] *= ratio_height


        if flag:
            detections.append([detections_tl, detections_br])


    result_pickle = os.path.join(result_dir, "results_points.pickle")
    with open(result_pickle, "wb") as f:
        pickle.dump(detections, f)

    return 0