Esempio n. 1
0
def test_net(tester, dets, det_range, gpu_id):

    dump_results = []

    start_time = time.time()

    img_start = det_range[0]
    img_id = 0
    img_id2 = 0
    pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id)
    pbar.set_description("GPU %s" % str(gpu_id))
    while img_start < det_range[1]:
        img_end = img_start + 1
        im_info = dets[img_start]
        while img_end < det_range[1] and dets[img_end]['image_id'] == im_info[
                'image_id']:
            img_end += 1

        # all human detection results of a certain image
        cropped_data = dets[img_start:img_end]

        pbar.update(img_end - img_start)
        img_start = img_end

        kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3))
        area_save = np.zeros(len(cropped_data))

        # cluster human detection results with test_batch_size
        for batch_id in range(0, len(cropped_data), cfg.test_batch_size):
            start_id = batch_id
            end_id = min(len(cropped_data), batch_id + cfg.test_batch_size)

            imgs = []
            crop_infos = []
            for i in range(start_id, end_id):
                img, crop_info = generate_batch(cropped_data[i], stage='test')
                imgs.append(img)
                crop_infos.append(crop_info)
            imgs = np.array(imgs)
            crop_infos = np.array(crop_infos)

            # forward
            heatmap = tester.predict_one([imgs])[0]

            if cfg.flip_test:
                flip_imgs = imgs[:, :, ::-1, :]
                flip_heatmap = tester.predict_one([flip_imgs])[0]

                flip_heatmap = flip_heatmap[:, :, ::-1, :]
                for (q, w) in cfg.kps_symmetry:
                    flip_heatmap_w, flip_heatmap_q = flip_heatmap[:, :, :, w].copy(
                    ), flip_heatmap[:, :, :, q].copy()
                    flip_heatmap[:, :, :,
                                 q], flip_heatmap[:, :, :,
                                                  w] = flip_heatmap_w, flip_heatmap_q
                flip_heatmap[:, :, 1:, :] = flip_heatmap.copy()[:, :, 0:-1, :]
                heatmap += flip_heatmap
                heatmap /= 2

            # for each human detection from clustered batch
            for image_id in range(start_id, end_id):

                for j in range(cfg.num_kps):
                    hm_j = heatmap[image_id - start_id, :, :, j]
                    idx = hm_j.argmax()
                    y, x = np.unravel_index(idx, hm_j.shape)

                    px = int(math.floor(x + 0.5))
                    py = int(math.floor(y + 0.5))
                    if 1 < px < cfg.output_shape[
                            1] - 1 and 1 < py < cfg.output_shape[0] - 1:
                        diff = np.array([
                            hm_j[py][px + 1] - hm_j[py][px - 1],
                            hm_j[py + 1][px] - hm_j[py - 1][px]
                        ])
                        diff = np.sign(diff)
                        x += diff[0] * .25
                        y += diff[1] * .25
                    kps_result[image_id, j, :2] = (x * cfg.input_shape[1] /
                                                   cfg.output_shape[1],
                                                   y * cfg.input_shape[0] /
                                                   cfg.output_shape[0])
                    kps_result[image_id, j, 2] = hm_j.max() / 255

                vis = False
                crop_info = crop_infos[image_id - start_id, :]
                area = (crop_info[2] - crop_info[0]) * (crop_info[3] -
                                                        crop_info[1])
                if vis and np.any(kps_result[image_id, :,
                                             2]) > 0.9 and area > 96**2:
                    tmpimg = imgs[image_id - start_id].copy()
                    tmpimg = cfg.denormalize_input(tmpimg)
                    tmpimg = tmpimg.astype('uint8')
                    tmpkps = np.zeros((3, cfg.num_kps))
                    tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0)
                    tmpkps[2, :] = kps_result[image_id, :, 2]
                    _tmpimg = tmpimg.copy()
                    _tmpimg = cfg.vis_keypoints(_tmpimg, tmpkps)
                    cv2.imwrite(
                        osp.join(cfg.vis_dir,
                                 str(img_id) + '_output.jpg'), _tmpimg)
                    img_id += 1

                # map back to original images
                for j in range(cfg.num_kps):
                    kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\
                    crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0]
                    kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\
                    crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1]

                area_save[image_id] = (crop_infos[image_id - start_id][2] -
                                       crop_infos[image_id - start_id][0]) * (
                                           crop_infos[image_id - start_id][3] -
                                           crop_infos[image_id - start_id][1])

        #vis
        vis = False
        if vis and np.any(kps_result[:, :, 2] > 0.9):
            tmpimg = cv2.imread(
                os.path.join(cfg.img_path, cropped_data[0]['imgpath']))
            tmpimg = tmpimg.astype('uint8')
            for i in range(len(kps_result)):
                tmpkps = np.zeros((3, cfg.num_kps))
                tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0)
                tmpkps[2, :] = kps_result[i, :, 2]
                tmpimg = cfg.vis_keypoints(tmpimg, tmpkps)
            cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg)
            img_id2 += 1

        score_result = np.copy(kps_result[:, :, 2])
        kps_result[:, :, 2] = 1
        kps_result = kps_result.reshape(-1, cfg.num_kps * 3)

        # rescoring and oks nms
        if cfg.dataset == 'COCO':
            rescored_score = np.zeros((len(score_result)))
            for i in range(len(score_result)):
                score_mask = score_result[i] > cfg.score_thr
                if np.sum(score_mask) > 0:
                    rescored_score[i] = np.mean(
                        score_result[i][score_mask]) * cropped_data[i]['score']
            score_result = rescored_score
            keep = oks_nms(kps_result, score_result, area_save,
                           cfg.oks_nms_thr)
            if len(keep) > 0:
                kps_result = kps_result[keep, :]
                score_result = score_result[keep]
                area_save = area_save[keep]
        elif cfg.dataset == 'PoseTrack':
            keep = oks_nms(kps_result, np.mean(score_result, axis=1),
                           area_save, cfg.oks_nms_thr)
            if len(keep) > 0:
                kps_result = kps_result[keep, :]
                score_result = score_result[keep, :]
                area_save = area_save[keep]

        # save result
        for i in range(len(kps_result)):
            if cfg.dataset == 'COCO':
                result = dict(image_id=im_info['image_id'],
                              category_id=1,
                              score=float(round(score_result[i], 4)),
                              keypoints=kps_result[i].round(3).tolist())
            elif cfg.dataset == 'PoseTrack':
                result = dict(image_id=im_info['image_id'],
                              category_id=1,
                              track_id=0,
                              scores=score_result[i].round(4).tolist(),
                              keypoints=kps_result[i].round(3).tolist())
            elif cfg.dataset == 'MPII':
                result = dict(image_id=im_info['image_id'],
                              scores=score_result[i].round(4).tolist(),
                              keypoints=kps_result[i].round(3).tolist())

            dump_results.append(result)

    return dump_results
Esempio n. 2
0
def test_net(tester, input_pose, det_range, gpu_id):
    dump_results = []
    start_time = time.time()
    img_start = det_range[0]
    img_id = 0
    img_id2 = 0
    pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id)
    pbar.set_description("GPU %s" % str(gpu_id))
    test_count = 0
    while img_start < det_range[1]:
        record_id = img_start
        img_end = img_start + 1
        im_info = input_pose[img_start]
        while img_end < det_range[1] and input_pose[img_end][
                'image_id'] == im_info['image_id']:
            img_end += 1

        # all human detection results of a certain image
        cropped_data = input_pose[img_start:img_end]
        #pbar.set_description("GPU %s" % str(gpu_id))
        pbar.update(img_end - img_start)

        img_start = img_end

        kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3))
        area_save = np.zeros(len(cropped_data))

        # cluster human detection results with test_batch_size
        for batch_id in range(0, len(cropped_data), cfg.test_batch_size):
            start_id = batch_id
            end_id = min(len(cropped_data), batch_id + cfg.test_batch_size)

            imgs = []
            input_pose_coords = []
            input_pose_valids = []
            input_pose_scores = []
            crop_infos = []

            for i in range(start_id, end_id):
                img, input_pose_coord, input_pose_valid, input_pose_score, crop_info = generate_batch(
                    cropped_data[i], stage='test')
                imgs.append(img)
                input_pose_coords.append(input_pose_coord)
                input_pose_valids.append(input_pose_valid)
                input_pose_scores.append(input_pose_score)
                crop_infos.append(crop_info)

            imgs = np.array(imgs)
            input_pose_coords = np.array(input_pose_coords)
            input_pose_valids = np.array(input_pose_valids)
            input_pose_scores = np.array(input_pose_scores)
            crop_infos = np.array(crop_infos)
            #if test_count < 2100:
            #    test_count += imgs.shape[0]
            #    continue
            #test_count += imgs.shape[0]

            # forward
            coord, heatmaps = tester.predict_one(
                [imgs, input_pose_coords, input_pose_valids])
            # np.savez('temp/imgs_{:d}_{:d}.npz'.format(record_id, batch_id), imgs=imgs, heatmaps=heatmaps, coord=coord)

            if cfg.flip_test:
                flip_imgs = imgs[:, :, ::-1, :]
                flip_input_pose_coords = input_pose_coords.copy()
                flip_input_pose_coords[:, :, 0] = cfg.input_shape[
                    1] - 1 - flip_input_pose_coords[:, :, 0]
                flip_input_pose_valids = input_pose_valids.copy()
                for (q, w) in cfg.kps_symmetry:
                    flip_input_pose_coords_w, flip_input_pose_coords_q = flip_input_pose_coords[:, w, :].copy(
                    ), flip_input_pose_coords[:, q, :].copy()
                    flip_input_pose_coords[:,
                                           q, :], flip_input_pose_coords[:,
                                                                         w, :] = flip_input_pose_coords_w, flip_input_pose_coords_q
                    flip_input_pose_valids_w, flip_input_pose_valids_q = flip_input_pose_valids[:, w].copy(
                    ), flip_input_pose_valids[:, q].copy()
                    flip_input_pose_valids[:,
                                           q], flip_input_pose_valids[:,
                                                                      w] = flip_input_pose_valids_w, flip_input_pose_valids_q

                flip_coord = tester.predict_one([
                    flip_imgs, flip_input_pose_coords, flip_input_pose_valids
                ])[0]

                flip_coord[:, :,
                           0] = cfg.input_shape[1] - 1 - flip_coord[:, :, 0]
                for (q, w) in cfg.kps_symmetry:
                    flip_coord_w, flip_coord_q = flip_coord[:, w, :].copy(
                    ), flip_coord[:, q, :].copy()
                    flip_coord[:,
                               q, :], flip_coord[:,
                                                 w, :] = flip_coord_w, flip_coord_q
                coord += flip_coord
                coord /= 2

            # for each human detection from clustered batch
            for image_id in range(start_id, end_id):

                kps_result[image_id, :, :2] = coord[image_id - start_id]
                kps_result[image_id, :,
                           2] = input_pose_scores[image_id - start_id]

                vis = True
                crop_info = crop_infos[image_id - start_id, :]
                area = (crop_info[2] - crop_info[0]) * (crop_info[3] -
                                                        crop_info[1])
                if vis and np.any(kps_result[image_id, :,
                                             2]) > 0.9 and area > 96**2:
                    tmpimg = imgs[image_id - start_id].copy()
                    tmpimg = cfg.denormalize_input(tmpimg)
                    tmpimg = tmpimg.astype('uint8')
                    tmpkps = np.zeros((3, cfg.num_kps))
                    tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0)
                    tmpkps[2, :] = kps_result[image_id, :, 2]

                    _tmpimg = tmpimg.copy()
                    _tmpimg = cfg.vis_keypoints(_tmpimg,
                                                tmpkps,
                                                kp_thresh=0.05)
                    cv2.imwrite(
                        osp.join(cfg.vis_dir,
                                 str(img_id) + '_output.jpg'), _tmpimg)
                    temp_heatmaps = heatmaps[image_id - start_id]

                    save_mergedHeatmaps(heatmaps[image_id - start_id],
                                        osp.join(cfg.vis_dir,
                                                 str(img_id) + '_hm.jpg'),
                                        softmax=True)
                    img_id += 1

                # map back to original images
                for j in range(cfg.num_kps):
                    kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\
                    crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0]
                    kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\
                    crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1]

                area_save[image_id] = (crop_infos[image_id - start_id][2] -
                                       crop_infos[image_id - start_id][0]) * (
                                           crop_infos[image_id - start_id][3] -
                                           crop_infos[image_id - start_id][1])

        #vis
        vis = False
        if vis and np.any(kps_result[:, :, 2] > 0.9):
            tmpimg = cv2.imread(
                os.path.join(cfg.img_path, cropped_data[0]['imgpath']))
            tmpimg = tmpimg.astype('uint8')
            for i in range(len(kps_result)):
                tmpkps = np.zeros((3, cfg.num_kps))
                tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0)
                tmpkps[2, :] = kps_result[i, :, 2]
                tmpimg = cfg.vis_keypoints(tmpimg, tmpkps)
            cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg)
            img_id2 += 1

        # oks nms
        if cfg.dataset in ['COCO', 'PoseTrack']:
            nms_kps = np.delete(kps_result, cfg.ignore_kps, 1)
            nms_score = np.mean(nms_kps[:, :, 2], axis=1)
            nms_kps[:, :, 2] = 1
            nms_kps = nms_kps.reshape(len(kps_result), -1)
            nms_sigmas = np.delete(cfg.kps_sigmas, cfg.ignore_kps)
            keep = oks_nms(nms_kps, nms_score, area_save, cfg.oks_nms_thr,
                           nms_sigmas)
            if len(keep) > 0:
                kps_result = kps_result[keep, :, :]
                area_save = area_save[keep]

        score_result = np.copy(kps_result[:, :, 2])
        kps_result[:, :, 2] = 1
        kps_result = kps_result.reshape(-1, cfg.num_kps * 3)

        # save result
        for i in range(len(kps_result)):
            if cfg.dataset == 'COCO':
                result = dict(image_id=im_info['image_id'],
                              category_id=1,
                              score=float(round(np.mean(score_result[i]), 4)),
                              keypoints=kps_result[i].round(3).tolist())
            elif cfg.dataset == 'PoseTrack':
                result = dict(image_id=im_info['image_id'],
                              category_id=1,
                              track_id=0,
                              scores=score_result[i].round(4).tolist(),
                              keypoints=kps_result[i].round(3).tolist())
            elif cfg.dataset == 'MPII':
                result = dict(image_id=im_info['image_id'],
                              scores=score_result[i].round(4).tolist(),
                              keypoints=kps_result[i].round(3).tolist())

            dump_results.append(result)

    return dump_results