Example #1
0
    def gt_db(self, is_train):
        cache_file = os.path.join(self.cache_path, self.name + '_gt_db.pkl')
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                db = pickle.load(fid)
            print('{} gt db loaded from {}, {} samples are loaded'.format(
                self.name, cache_file, len(db)))
            return db

        if is_train:
            img_list, window_anno_list, shape_list = self.parse_gt_file(
                os.path.join(self.dataset_path, 'TRAIN', self.benchmark_name))
        else:
            img_list, window_anno_list, shape_list = self.parse_gt_file(
                os.path.join(self.dataset_path, 'TEST'))

        gt_db = list()
        for n_img in range(len(img_list)):
            image_path = img_list[n_img]
            the_sample_window = np.array(window_anno_list[n_img],
                                         dtype=np.float)
            im_height, im_width, _ = shape_list[n_img]

            if len(
                    the_sample_window
            ) > max_num_windows:  #exclude windows whose num exceeds max_num_windows
                continue

            left_top = the_sample_window[:, 0:2].copy()
            left_bottom = the_sample_window[:, 2:4].copy()
            right_bottom = the_sample_window[:, 4:6].copy()
            right_top = the_sample_window[:, 6:8].copy()
            center = the_sample_window.reshape((-1, 4, 2)).mean(axis=1)

            the_sample_window = the_sample_window.reshape(
                (the_sample_window.shape[0], 4, 2))

            gt_db.append({
                'image': image_path,
                'left_top': left_top,
                'left_bottom': left_bottom,
                'right_bottom': right_bottom,
                'right_top': right_top,
                'center': center,
                'windows': np.array(the_sample_window),
                'im_width': im_width,
                'im_height': im_height
            })

            DEBUG = False
            if DEBUG:
                debug_vis(image_path,
                          (left_top, left_bottom, right_bottom, right_top),
                          'Dataset Parsing')

        with open(cache_file, 'wb') as fid:
            pickle.dump(gt_db, fid, pickle.HIGHEST_PROTOCOL)
        print('{} samples ared wrote {}'.format(len(gt_db), cache_file))

        return gt_db
Example #2
0
def get_single_patch_sample(img_path, windows, flip_pairs, patch_width, patch_height, mean, std,
                            do_augment, aug_config, label_func, label_config):
    # 1. load image
    cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
    if not isinstance(cvimg, np.ndarray):
        raise IOError("Fail to read %s" % img_path)

    img_height, img_width, img_channels = cvimg.shape #original shape

    # 3. get augmentation params
    if do_augment:
        scale, rot, center, do_flip, color_scale = do_augmentation(aug_config)
    else:
        scale, rot, center, do_flip, color_scale = 1.0, 0, np.zeros(2), False, [1.0, 1.0, 1.0]

    # 4. generate image patch
    aug_param = [scale, rot, center]
    img_patch_cv, trans = generate_patch_image_cv(cvimg.copy(), img_width, img_height,
                                                  patch_width, patch_height, do_flip, aug_param)
    img_patch_tensor = convert_cvimg_to_tensor(img_patch_cv)

    # apply normalization
    for n_c in range(img_channels):
        img_patch_tensor[n_c, :, :] = np.clip(img_patch_tensor[n_c, :, :] * color_scale[n_c], 0, 255)
        if aug_config.use_color_normalize and mean is not None and std is not None:
            img_patch_tensor[n_c, :, :] = (img_patch_tensor[n_c, :, :] - mean[n_c]) / std[n_c]

    # 5. generate patch joint ground truth,flip joints
    if do_flip:
        windows = fliplr_label(windows, img_width, flip_pairs)

    # 6. Apply Affine Transform on joints
    for idx in range(len(windows)):
        for n_jt in range(len(windows[idx])):
            windows[idx][n_jt, :] = trans_point2d(windows[idx][n_jt, :], trans)

    # 7. get label of some type according to certain need
    label = label_func(label_config, patch_width, patch_height, windows)

    # 8. get gt loc for AE method
    gt_loc = np.zeros((10, 4, 2))
    if label_config.useAE:
        gt_loc = np.transpose(np.array(windows[0: 4]), (1, 0, 2))
        gt_loc = (gt_loc / label_config.feat_stride + 0.5).astype(int)
        hm_size = patch_height // label_config.feat_stride
        gt_loc = KeypointsRef(gt_loc, hm_size)

    VIS = False
    if VIS:
        debug_vis(img_patch_cv, windows, label=label, raw_img = cvimg)

    return img_patch_tensor, label, gt_loc
Example #3
0
    def dt_db(self, det_bbox_src):
        print("Using Detector:", det_bbox_src)

        self.detector = det_bbox_src
        folders, sample_num, step, folder_start, folder_end = self._sample_dataset(
            self.image_set_name)

        dt_cache_file = os.path.join(
            self.cache_path,
            self.name + '_keypoint_dt_db_sample' + str(sample_num) + '.pkl')
        if os.path.exists(dt_cache_file):
            with open(dt_cache_file, 'rb') as fid:
                dt_db = pk.load(fid)
            print('{} gt db loaded from {}, {} samples are loaded'.format(
                self.name, dt_cache_file, len(dt_db)))
            return dt_db

        gt_cache_file = os.path.join(
            self.cache_path,
            self.name + '_keypoint_db_sample' + str(sample_num) + '.pkl')

        if os.path.exists(gt_cache_file):
            with open(gt_cache_file, 'rb') as fid:
                gt_db = pk.load(fid)
            print('{} gt db loaded from {}, {} samples are loaded'.format(
                self.name, gt_cache_file, len(gt_db)))
        else:
            assert 0, gt_cache_file + ' not exist...'

        self.num_sample_single = len(gt_db)
        self.mean_bone_length = np.asarray(
            [item['bone_len'] for item in gt_db]).mean()

        # update bbox using detection result
        print("Updating BBox from detector")
        bbox_file = os.path.join(self.cache_path, 'detection', det_bbox_src,
                                 'kpts_bbox.pkl')
        with open(bbox_file, 'rb') as fid:
            bbox_list = pk.load(fid)

        assert len(bbox_list) == len(gt_db)
        for idx in range(len(gt_db)):
            box = bbox_list[idx]
            center_x = (box[0] + box[2]) * 0.5
            center_y = (box[1] + box[3]) * 0.5

            width = box[2] - box[0]
            height = box[3] - box[1]

            if width > self.aspect_ratio * height:
                height = width * 1.0 / self.aspect_ratio
            elif width < self.aspect_ratio * height:
                width = height * self.aspect_ratio

            width = width * 1.25
            height = height * 1.25

            gt_db[idx]['center_x'] = center_x
            gt_db[idx]['center_y'] = center_y
            gt_db[idx]['width'] = width
            gt_db[idx]['height'] = height

            DEBUG = False
            if DEBUG:
                box = [center_x, center_y, width, height]
                pose = []
                debug_vis(os.path.join(gt_db[idx]['image']), box, pose)

        self.num_sample_single = len(gt_db)

        return gt_db
Example #4
0
    def dt_db(self, det_bbox_src):
        '''
        This function is to organize image path, maskRCNN bbox, etc into data structure,
        for the purpose of predicting 2d pose by a mpii pose estimator.
        So only image&bbox related are useful, others by default set to zero
        :param det_bbox_src:
        :return:
        '''
        self.detector = det_bbox_src
        cache_file = '{}_bbox_dt_{}_db.pkl'.format(self.name, self.detector)
        cache_file = os.path.join(self.cache_path, cache_file)
        db = None

        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                db = pk.load(fid)
            print('{} gt db loaded from {}, {} samples are loaded'.format(self.name, cache_file, len(db)))

        if db != None:
            self.num_sample_single = len(db)
            return db

        dt_db = []
        for idx in range(len(self.image_names)):
            img_path = os.path.join(self.dataset_path, self.image_set_name, 'IMG', '%05d.jpg' % (idx + 1))
            bbox_file = os.path.join(self.dataset_path, self.image_set_name,
                                     'detection', self.detector, '%05d.pkl' % (idx + 1))

            # process bbox
            with open(bbox_file, 'rb') as fid:
                bbox = pk.load(fid)

            assert len(bbox) == 1, "Cannot be %d bbox for image %s"%(len(bbox), img_path)
            box = bbox[0]
            center_x = (box[0] + box[2]) * 0.5
            center_y = (box[1] + box[3]) * 0.5
            score = box[4]

            width  = box[2] - box[0]
            height = box[3] - box[1]

            if width > self.aspect_ratio * height:
                height = width * 1.0 / self.aspect_ratio
            elif width < self.aspect_ratio * height:
                width = height * self.aspect_ratio

            width  = width * 1.1
            height = height * 1.1

            dt_db.append({
                'image': img_path,
                'flip_pairs': self.flip_pairs,
                'parent_ids': self.parent_ids,

                # joint, useless
                'joints_3d': np.zeros((18,3)),   # [org_img_x, org_img_y, depth - root_depth]
                'joints_3d_vis': np.zeros((18,3)),
                'joints_3d_relative': np.zeros((18,3)), # [X-root, Y-root, Z-root] in camera coordinate, substracted by root
                'bone_len':0,

                # bbox
                'center_x': center_x,
                'center_y': center_y,
                'width': width,
                'height': height,
                'score': score,
            })

            DEBUG = False
            if DEBUG:
                bbox = [center_x, center_y, width, height]
                pose = []
                debug_vis(img_path, bbox, pose)

        with open(cache_file, 'wb') as fid:
            pk.dump(dt_db, fid, pk.HIGHEST_PROTOCOL)
        print('{} samples ared wrote {}'.format(len(dt_db), cache_file))

        self.num_sample_single = len(dt_db)
        return dt_db
Example #5
0
def jnt_bbox_db_core(name, cache_path, dataset_path, image_set_name, image_names, joint_num, aspect_ratio
                     , flip_pairs, parent_ids):
    '''
    This function is to 1)get aligned 2d pose;  2)record align params;
    3)generate bbox around aligned 2d pose; 4)calc skeleton length
    It's the very db used to train and val
    :return:
    '''
    cache_file = '{}_keypoint_jnt_bbox_db.pkl'.format(name)
    cache_file = os.path.join(cache_path, cache_file)
    db = None

    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            db = pk.load(fid)
        print('{} gt db loaded from {}, {} samples are loaded'.format(name, cache_file, len(db)))

    if db != None:
        return db

    img_pred_file = '{}_{}samples_img_res.pkl'.format('HM36_eccv_challenge_' + image_set_name, len(image_names))
    img_pred_file = os.path.join(dataset_path, image_set_name, img_pred_file)
    with open(img_pred_file, 'rb') as fid:
        img_pred = pk.load(fid)

    dt_db = []
    for idx in range(len(image_names)):
        img_path = os.path.join(dataset_path, image_set_name, 'IMG', '%05d.jpg' % (idx + 1))
        pred_pose_in_img_wz_score = img_pred[idx]["kpts"]  # 18x3, already in hm36 skeleton structure
        pred_pose_vis = img_pred[idx]["vis"]

        if image_set_name == 'Test':
            # only thing need to do: generate bbox around kpts
            mask = np.where(pred_pose_vis[:, 0] > 0)  # only align visible joints
            u, d, l, r = calc_kpt_bound(pred_pose_in_img_wz_score[mask[0], 0:2], pred_pose_vis[mask[0], 0:2])
            align_joints_2d_wz = joints_2d_vis = gtPose = np.zeros((18, 3))
            skeleton_length = s = rot = t = 0
        elif image_set_name in ['Train', 'Val']:
            # process pose
            gt_file = os.path.join(dataset_path, image_set_name, 'POSE', '%05d.csv' % (idx + 1))
            gtPose = genfromtxt(gt_file, delimiter=',')

            # add thorax
            if joint_num == s_36_jt_num:
                thorax = (gtPose[s_36_lsh_jt_idx] + gtPose[s_36_rsh_jt_idx]) * 0.5
                thorax = thorax.reshape((1, 3))
                gtPose = np.concatenate((gtPose, thorax), axis=0)
            assert len(gtPose) == s_36_jt_num, "#Joint Must be 18, Now #Joint %d" % len(gtPose)
            # align
            mask = np.where(pred_pose_vis[:, 0] > 0)  # only align visible joints
            target_pose = pred_pose_in_img_wz_score[mask[0], 0:2]
            from_pose = gtPose[mask[0], 0:2]
            _, Z, rot, s, t = compute_similarity_transform(target_pose, from_pose, compute_optimal_scale=True)


            align_joints_2d_wz = s * gtPose[:, 0:2].dot(rot) + t
            align_joints_2d_wz = np.concatenate((align_joints_2d_wz, gtPose[:, 2:3] * s), axis=1)

            joints_2d_vis = np.ones(align_joints_2d_wz.shape, dtype=np.float)

            # other
            skeleton_length = calc_total_skeleton_length(gtPose, s_36_parent_ids)

            # generate bbox
            u, d, l, r = calc_kpt_bound(align_joints_2d_wz, joints_2d_vis)


        center_x = (l + r) * 0.5
        center_y = (u + d) * 0.5
        assert center_x >= 1

        w = r - l
        h = d - u
        assert w > 0
        assert h > 0

        if w > aspect_ratio * h:
            h = w * 1.0 / aspect_ratio
        elif w < aspect_ratio * h:
            w = h * aspect_ratio

        w *= 1.25
        h *= 1.25

        dt_db.append({
            'image': img_path,
            'flip_pairs': flip_pairs,
            'parent_ids': parent_ids,

            # pose
            'joints_3d': align_joints_2d_wz,  # [org_img_x, org_img_y, depth - root_depth]
            'joints_3d_vis': joints_2d_vis,
            'joints_3d_relative': gtPose,  # [X-root, Y-root, Z-root] in camera coordinate, substracted by root
            'bone_len': skeleton_length,

            # bbox
            'center_x': center_x,
            'center_y': center_y,
            'width': w,
            'height': h,

            # align
            's': s,
            'rot': rot,
            't': t
        })

        DEBUG = False
        if DEBUG:
            bbox = [center_x, center_y, w, h]
            pose = [align_joints_2d_wz, joints_2d_vis]
            debug_vis(img_path, bbox, pose)


    with open(cache_file, 'wb') as fid:
        pk.dump(dt_db, fid, pk.HIGHEST_PROTOCOL)
    print('{} samples ared wrote {}'.format(len(dt_db), cache_file))

    return dt_db
Example #6
0
    def gt_db(self):

        cache_file = os.path.join(self.cache_path,
                                  self.name + '_keypoint_db_v3' + '.pkl')
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                db = pk.load(fid)
            print('{} gt db loaded from {}, {} samples are loaded'.format(
                self.name, cache_file, len(db)))
            return db

        # create train/val split
        with open(
                os.path.join(self.dataset_path, 'annot',
                             self.image_set_name + '.json')) as anno_file:
            anno = json.load(anno_file)

        gt_db = []
        for a in anno:
            # center and size
            c = np.array(a['center'], dtype=np.float)
            c_x = c[0]
            c_y = c[1]
            assert c_x >= 1
            c_x = c_x - 1
            c_y = c_y - 1
            s = np.array([a['scale'], a['scale']], dtype=np.float)
            width = s[0]
            height = s[1]
            # Adjust center/scale slightly to avoid cropping limbs, this is the common practice on mpii dataset
            c_y = c_y + 15 * height

            width = width * 1.25 * self.pixel_std
            height = height * 1.25 * self.pixel_std

            if width / height >= 1.0 * self.patch_width / self.patch_height:
                width = 1.0 * height * self.patch_width / self.patch_height
            else:
                assert 0, "Error. Invalid patch width and height"

            # joints and vis
            jts_3d = np.zeros((self.joint_num, 3), dtype=np.float)
            jts_3d_vis = np.zeros((self.joint_num, 3), dtype=np.float)
            if self.image_set_name != 'test':
                jts = np.array(a['joints'])
                jts[:, 0:2] = jts[:, 0:2] - 1
                jts_vis = np.array(a['joints_vis'])
                assert len(
                    jts) == self.joint_num, 'joint num diff: {} vs {}'.format(
                        len(jts), self.joint_num)
                jts_3d[:, 0:2] = jts[:, 0:2]
                jts_3d_vis[:, 0] = jts_vis[:]
                jts_3d_vis[:, 1] = jts_vis[:]

            img_path = os.path.join(self.dataset_path, '', 'images',
                                    a['image'])
            gt_db.append({
                'image': img_path,
                'center_x': c_x,
                'center_y': c_y,
                'width': width,
                'height': height,
                'flip_pairs': self.flip_pairs,
                'parent_ids': self.parent_ids,
                'joints_3d': jts_3d,
                'joints_3d_vis': jts_3d_vis,
            })

            DEBUG = False
            if DEBUG:
                box = [c_x, c_y, width, height]
                pose = [jts_3d, jts_3d_vis]
                debug_vis(img_path, box, pose)

        with open(cache_file, 'wb') as fid:
            pk.dump(gt_db, fid, pk.HIGHEST_PROTOCOL)
        print('{} samples ared wrote {}'.format(len(gt_db), cache_file))

        return gt_db
Example #7
0
    def jnt_bbox_db(self):

        cache_file = os.path.join(self.cache_path,
                                  self.name + '_keypoint_jntBBox_db.pkl')
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                db = pk.load(fid)
            print('{} gt db loaded from {}, {} samples are loaded'.format(
                self.name, cache_file, len(db)))
            return db

        # create train/val split
        with open(
                os.path.join(self.dataset_path, 'annot',
                             self.image_set_name + '.json')) as anno_file:
            anno = json.load(anno_file)

        gt_db = []
        for a in anno:
            # joints and vis
            jts_3d = np.zeros((self.joint_num, 3), dtype=np.float)
            jts_3d_vis = np.zeros((self.joint_num, 3), dtype=np.float)
            if self.image_set_name != 'test':
                jts = np.array(a['joints'])
                jts[:, 0:2] = jts[:, 0:2] - 1
                jts_vis = np.array(a['joints_vis'])
                assert len(
                    jts) == self.joint_num, 'joint num diff: {} vs {}'.format(
                        len(jts), self.joint_num)
                jts_3d[:, 0:2] = jts[:, 0:2]
                jts_3d_vis[:, 0] = jts_vis[:]
                jts_3d_vis[:, 1] = jts_vis[:]

            if np.sum(jts_3d_vis[:, 0]) < 2:  # only one joint visible, skip
                continue

            u, d, l, r = calc_kpt_bound(jts_3d, jts_3d_vis)
            center = np.array([(l + r) * 0.5, (u + d) * 0.5], dtype=np.float32)
            c_x = center[0]
            c_y = center[1]
            assert c_x >= 1

            w = r - l
            h = d - u

            assert w > 0
            assert h > 0

            if w > self.aspect_ratio * h:
                h = w * 1.0 / self.aspect_ratio
            elif w < self.aspect_ratio * h:
                w = h * self.aspect_ratio

            width = w * 1.25
            height = h * 1.25

            img_path = os.path.join(self.dataset_path, '', 'images',
                                    a['image'])
            gt_db.append({
                'image': img_path,
                'center_x': c_x,
                'center_y': c_y,
                'width': width,
                'height': height,
                'flip_pairs': self.flip_pairs,
                'parent_ids': self.parent_ids,
                'joints_3d': jts_3d,
                'joints_3d_vis': jts_3d_vis,
            })

            DEBUG = False
            if DEBUG:
                box = [c_x, c_y, width, height]
                pose = [jts_3d, jts_3d_vis]
                debug_vis(img_path, box, pose)

        with open(cache_file, 'wb') as fid:
            pk.dump(gt_db, fid, pk.HIGHEST_PROTOCOL)
        print('{} samples ared wrote {}'.format(len(gt_db), cache_file))

        return gt_db