def draw_skeleton(image, kp_2d, dataset='common', unnormalize=True, thickness=2): if unnormalize: kp_2d[:, :2] = normalize_2d_kp(kp_2d[:, :2], 224, inv=True) kp_2d[:, 2] = kp_2d[:, 2] > 0.3 kp_2d = np.array(kp_2d, dtype=int) rcolor = get_colors()['red'].tolist() pcolor = get_colors()['green'].tolist() lcolor = get_colors()['blue'].tolist() skeleton = eval(f'kp_utils.get_{dataset}_skeleton')() common_lr = [0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0] for idx, pt in enumerate(kp_2d): if pt[2] > 0: # if visible cv2.circle(image, (pt[0], pt[1]), 4, pcolor, -1) # cv2.putText(image, f'{idx}', (pt[0]+1, pt[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 255, 0)) for i, (j1, j2) in enumerate(skeleton): if kp_2d[j1, 2] > 0 and kp_2d[j2, 2] > 0: # if visible if dataset == 'common': color = rcolor if common_lr[i] == 0 else lcolor else: color = lcolor if i % 2 == 0 else rcolor pt1, pt2 = (kp_2d[j1, 0], kp_2d[j1, 1]), (kp_2d[j2, 0], kp_2d[j2, 1]) cv2.line(image, pt1=pt1, pt2=pt2, color=color, thickness=thickness) return image
def get_single_item(self, index): start_index, end_index = self.vid_indices[index] with h5py.File(self.h5_file, 'r') as db: self.db = db kp_2d = self.db['joints2D'][start_index:end_index + 1] kp_2d = convert_kps(kp_2d, src='insta', dst='spin') kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16) input = torch.from_numpy(self.db['features'][start_index:end_index+1]).float() vid_name = self.db['vid_name'][start_index:end_index + 1] frame_id = self.db['frame_id'][start_index:end_index + 1].astype(str) instance_id = np.array([v.decode('ascii') + f for v, f in zip(vid_name, frame_id)]) for idx in range(self.seqlen): kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224) kp_2d_tensor[idx] = kp_2d[idx] target = { 'features': input, 'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping # 'instance_id': instance_id } return target
def get_single_item(self, index): start_index, end_index = self.vid_indices[index] kp_2d = self.db['joints2D'][start_index:end_index + 1] if self.dataset_name != 'posetrack': kp_2d = convert_kps(kp_2d, src=self.dataset_name, dst='spin') kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16) bbox = self.db['bbox'][start_index:end_index + 1] input = torch.from_numpy(self.db['features'][start_index:end_index + 1]).float() for idx in range(self.seqlen): # crop image and transform 2d keypoints kp_2d[idx, :, :2], trans = transfrom_keypoints( kp_2d=kp_2d[idx, :, :2], center_x=bbox[idx, 0], center_y=bbox[idx, 1], width=bbox[idx, 2], height=bbox[idx, 3], patch_width=224, patch_height=224, do_augment=False, ) kp_2d[idx, :, :2] = normalize_2d_kp(kp_2d[idx, :, :2], 224) kp_2d_tensor[idx] = kp_2d[idx] vid_name = self.db['vid_name'][start_index:end_index + 1] frame_id = self.db['img_name'][start_index:end_index + 1].astype(str) instance_id = np.array([v + f for v, f in zip(vid_name, frame_id)]) target = { 'features': input, 'kp_2d': torch.from_numpy(kp_2d_tensor).float( ), # 2D keypoints transformed according to bbox cropping # 'instance_id': instance_id, } if self.debug: from lib.data_utils.img_utils import get_single_image_crop vid_name = self.db['vid_name'][start_index] if self.dataset_name == 'pennaction': vid_folder = "frames" vid_name = vid_name.split('/')[-1].split('.')[0] img_id = "img_name" elif self.dataset_name == 'posetrack': vid_folder = osp.join('images', vid_name.split('/')[-2]) vid_name = vid_name.split('/')[-1].split('.')[0] img_id = "img_name" else: vid_name = '_'.join(vid_name.split('_')[:-1]) vid_folder = 'imageFiles' img_id = 'frame_id' f = osp.join(self.folder, vid_folder, vid_name) video_file_list = [ osp.join(f, x) for x in sorted(os.listdir(f)) if x.endswith('.jpg') ] frame_idxs = self.db[img_id][start_index:end_index + 1] if self.dataset_name == 'pennaction' or self.dataset_name == 'posetrack': video = frame_idxs else: video = [video_file_list[i] for i in frame_idxs] video = torch.cat([ get_single_image_crop(image, bbox).unsqueeze(0) for image, bbox in zip(video, bbox) ], dim=0) target['video'] = video return target
def get_single_item(self, index): start_index, end_index = self.vid_indices[index] is_train = self.set == 'train' if self.dataset_name == '3dpw': kp_2d = convert_kps(self.db['joints2D'][start_index:end_index + 1], src='common', dst='spin') kp_3d = self.db['joints3D'][start_index:end_index + 1] elif self.dataset_name == 'mpii3d': kp_2d = self.db['joints2D'][start_index:end_index + 1] if is_train: kp_3d = self.db['joints3D'][start_index:end_index + 1] else: kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common') elif self.dataset_name == 'h36m': kp_2d = self.db['joints2D'][start_index:end_index + 1] if is_train: kp_3d = self.db['joints3D'][start_index:end_index + 1] else: kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common') kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16) nj = 14 if not is_train else 49 kp_3d_tensor = np.zeros((self.seqlen, nj, 3), dtype=np.float16) if self.dataset_name == '3dpw': pose = self.db['pose'][start_index:end_index+1] shape = self.db['shape'][start_index:end_index+1] w_smpl = torch.ones(self.seqlen).float() w_3d = torch.ones(self.seqlen).float() elif self.dataset_name == 'h36m': if not is_train: pose = np.zeros((kp_2d.shape[0], 72)) shape = np.zeros((kp_2d.shape[0], 10)) w_smpl = torch.zeros(self.seqlen).float() w_3d = torch.ones(self.seqlen).float() else: pose = self.db['pose'][start_index:end_index + 1] shape = self.db['shape'][start_index:end_index + 1] w_smpl = torch.ones(self.seqlen).float() w_3d = torch.ones(self.seqlen).float() elif self.dataset_name == 'mpii3d': pose = np.zeros((kp_2d.shape[0], 72)) shape = np.zeros((kp_2d.shape[0], 10)) w_smpl = torch.zeros(self.seqlen).float() w_3d = torch.ones(self.seqlen).float() bbox = self.db['bbox'][start_index:end_index + 1] bbox_orig = bbox bbox_orig[:, 2] = bbox[:, 2] * 0.5 input = torch.from_numpy(self.db['features'][start_index:end_index+1]).float() theta_tensor = np.zeros((self.seqlen, 85), dtype=np.float16) for idx in range(self.seqlen): # crop image and transform 2d keypoints kp_2d[idx,:,:2], trans = transfrom_keypoints( kp_2d=kp_2d[idx,:,:2], center_x=bbox[idx,0], center_y=bbox[idx,1], width=bbox[idx,2], height=bbox[idx,3], patch_width=224, patch_height=224, do_augment=False, ) kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224) # theta shape (85,) theta = np.concatenate((np.array([1., 0., 0.]), pose[idx], shape[idx]), axis=0) kp_2d_tensor[idx] = kp_2d[idx] theta_tensor[idx] = theta kp_3d_tensor[idx] = kp_3d[idx] target = { 'features': input, 'theta': torch.from_numpy(theta_tensor).float(), # camera, pose and shape 'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping 'kp_3d': torch.from_numpy(kp_3d_tensor).float(), # 3D keypoints 'w_smpl': w_smpl, 'w_3d': w_3d, } if self.dataset_name == 'mpii3d' and not is_train: target['valid'] = self.db['valid_i'][start_index:end_index+1] if self.dataset_name == '3dpw' and not is_train: vn = self.db['vid_name'][start_index:end_index + 1] fi = self.db['frame_id'][start_index:end_index + 1] target['instance_id'] = [f'{v}/{f}'for v,f in zip(vn,fi)] # if self.dataset_name == '3dpw' and not self.is_train: # target['imgname'] = self.db['img_name'][start_index:end_index+1].tolist() # target['imgname'] = np.array(target['imgname']) # print(target['imgname'].dtype) # target['center'] = self.db['bbox'][start_index:end_index+1, :2] # target['valid'] = torch.from_numpy(self.db['valid'][start_index:end_index+1]) #if self.debug: from lib.data_utils.img_utils import get_single_image_crop if self.dataset_name == 'mpii3d': video_names = self.db['img_name'][start_index:end_index+1] # print(video) elif self.dataset_name == 'h36m': video_names = self.db['img_name'][start_index:end_index + 1] else: vid_name = self.db['vid_name'][start_index] vid_name = '_'.join(vid_name.split('_')[:-1]) f = osp.join(self.folder, 'imageFiles', vid_name) video_file_list = [osp.join(f, x) for x in sorted(os.listdir(f)) if x.endswith('.jpg')] frame_idxs = self.db['frame_id'][start_index:end_index + 1] # print(f, frame_idxs) video_names = [video_file_list[i] for i in frame_idxs] count = 0 for image_name, tmp_bbox_orig in zip(video_names, bbox_orig): image_yolo, image_big, bbox_orig_yolo, bbox_orig_big = get_single_image_full(image_name, tmp_bbox_orig) if count == 0: bbox_orig_big_all = [bbox_orig_big] bbox_orig_yolo_all = [bbox_orig_yolo] video_big = image_big.unsqueeze(0) video_yolo = image_yolo.unsqueeze(0) else: bbox_orig_big_all = np.append(bbox_orig_big_all, [bbox_orig_big], axis=0) bbox_orig_yolo_all = np.append(bbox_orig_yolo_all, [bbox_orig_yolo], axis=0) video_big = torch.cat([video_big, image_big.unsqueeze(0)]) video_yolo = torch.cat([video_yolo, image_yolo.unsqueeze(0)]) count += 1 target['video_big'] = video_big target['video_yolo'] = video_yolo target['bbox_orig_yolo'] = bbox_orig_yolo_all target['bbox_orig_big'] = bbox_orig_big_all return target
def get_single_item(self, index): curr_key = self.data_keys[index] curr_length = self.vid_lengths[curr_key] vid_start = self.vid_start[curr_key] start_index = (torch.randint(curr_length - self.seqlen, (1, )) + vid_start if curr_length - self.seqlen != 0 else vid_start).long() end_index = (start_index + self.seqlen - 1).long() is_train = self.set == 'train' if self.dataset_name == '3dpw' or self.dataset_name == 'amass_rend_take3': kp_2d = convert_kps(self.db['joints2D'][start_index:end_index + 1], src='common', dst='spin') kp_3d = self.db['joints3D'][start_index:end_index + 1] elif self.dataset_name == 'mpii3d': kp_2d = self.db['joints2D'][start_index:end_index + 1] if is_train: kp_3d = self.db['joints3D'][start_index:end_index + 1] else: kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common') elif self.dataset_name == 'h36m': kp_2d = self.db['joints2D'][start_index:end_index + 1] if is_train: kp_3d = self.db['joints3D'][start_index:end_index + 1] else: kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common') kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16) nj = 14 if not is_train else 49 kp_3d_tensor = np.zeros((self.seqlen, nj, 3), dtype=np.float16) if self.dataset_name == '3dpw' or self.dataset_name == 'amass_rend_take3': pose = self.db['pose'][start_index:end_index + 1] shape = self.db['shape'][start_index:end_index + 1] w_smpl = torch.ones(self.seqlen).float() w_3d = torch.ones(self.seqlen).float() elif self.dataset_name == 'h36m': if not is_train: pose = np.zeros((kp_2d.shape[0], 72)) shape = np.zeros((kp_2d.shape[0], 10)) w_smpl = torch.zeros(self.seqlen).float() w_3d = torch.ones(self.seqlen).float() else: pose = self.db['pose'][start_index:end_index + 1] shape = self.db['shape'][start_index:end_index + 1] w_smpl = torch.ones(self.seqlen).float() w_3d = torch.ones(self.seqlen).float() elif self.dataset_name == 'mpii3d': pose = np.zeros((kp_2d.shape[0], 72)) shape = np.zeros((kp_2d.shape[0], 10)) w_smpl = torch.zeros(self.seqlen).float() w_3d = torch.ones(self.seqlen).float() bbox = self.db['bbox'][start_index:end_index + 1] input = torch.from_numpy(self.db['features'][start_index:end_index + 1]).float() theta_tensor = np.zeros((self.seqlen, 85), dtype=np.float16) for idx in range(self.seqlen): # crop image and transform 2d keypoints kp_2d[idx, :, :2], trans = transfrom_keypoints( kp_2d=kp_2d[idx, :, :2], center_x=bbox[idx, 0], center_y=bbox[idx, 1], width=bbox[idx, 2], height=bbox[idx, 3], patch_width=224, patch_height=224, do_augment=False, ) kp_2d[idx, :, :2] = normalize_2d_kp(kp_2d[idx, :, :2], 224) # theta shape (85,) theta = np.concatenate( (np.array([1., 0., 0.]), pose[idx], shape[idx]), axis=0) kp_2d_tensor[idx] = kp_2d[idx] theta_tensor[idx] = theta kp_3d_tensor[idx] = kp_3d[idx] target = { 'features': input, 'theta': torch.from_numpy(theta_tensor).float(), # camera, pose and shape 'kp_2d': torch.from_numpy(kp_2d_tensor).float( ), # 2D keypoints transformed according to bbox cropping 'kp_3d': torch.from_numpy(kp_3d_tensor).float(), # 3D keypoints 'w_smpl': w_smpl, 'w_3d': w_3d, } if self.dataset_name == 'mpii3d' and not is_train: target['valid'] = self.db['valid_i'][start_index:end_index + 1] if (self.dataset_name == '3dpw' or self.dataset_name == 'amass_rend_take3') and not is_train: vn = self.db['vid_name'][start_index:end_index + 1] fi = self.db['frame_id'][start_index:end_index + 1] target['instance_id'] = [f'{v}/{f}' for v, f in zip(vn, fi)] # if self.dataset_name == '3dpw' and not self.is_train: # target['imgname'] = self.db['img_name'][start_index:end_index+1].tolist() # target['imgname'] = np.array(target['imgname']) # print(target['imgname'].dtype) # target['center'] = self.db['bbox'][start_index:end_index+1, :2] # target['valid'] = torch.from_numpy(self.db['valid'][start_index:end_index+1]) if self.debug: from lib.data_utils.img_utils import get_single_image_crop if self.dataset_name == 'mpii3d': video = self.db['img_name'][start_index:end_index + 1] # print(video) elif self.dataset_name == 'h36m': video = self.db['img_name'][start_index:end_index + 1] else: vid_name = self.db['vid_name'][start_index] vid_name = '_'.join(vid_name.split('_')[:-1]) f = osp.join(self.folder, 'imageFiles', vid_name) video_file_list = [ osp.join(f, x) for x in sorted(os.listdir(f)) if x.endswith('.jpg') ] frame_idxs = self.db['frame_id'][start_index:end_index + 1] # print(f, frame_idxs) video = [video_file_list[i] for i in frame_idxs] video = torch.cat([ get_single_image_crop(image, bbox).unsqueeze(0) for image, bbox in zip(video, bbox) ], dim=0) target['video'] = video return target