def read_data(amass_data, set, debug=False, max_samples = -1): dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'shape': [], 'pose': [], 'bbox': [], 'img_name': [], 'features': [], 'valid': [], } device = ( torch.device("cuda", index=0) if torch.cuda.is_available() else torch.device("cpu") ) model = spin.get_pretrained_hmr() smpl_renderer = SMPL_Renderer(device = device, image_size = 400, camera_mode = "look_at") for i, (k,v) in tqdm(enumerate(amass_data)): vid_name, frame_id, j3d, j2d, shape, pose, bbox, img_name, features, valid = amass_to_dataset(k, v, set = set, smpl_renderer = smpl_renderer) if not vid_name is None: bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(j2d, vis_thresh=VIS_THRESH, sigma=8) c_x = bbox_params[:,0] c_y = bbox_params[:,1] scale = bbox_params[:,2] w = h = 150. / scale w = h = h * 1.1 bbox = np.vstack([c_x,c_y,w,h]).T # print('campose', campose_valid[time_pt1:time_pt2].shape) img_paths_array = img_name dataset['vid_name'].append(vid_name) dataset['frame_id'].append(frame_id) dataset['img_name'].append(img_name) dataset['joints3D'].append(j3d) dataset['joints2D'].append(j2d) dataset['shape'].append(shape) dataset['pose'].append(pose) dataset['bbox'].append(bbox) dataset['valid'].append(valid) features = extract_features(model, img_paths_array, bbox, kp_2d=j2d[time_pt1:time_pt2], debug=debug, dataset='3dpw', scale=1.2) dataset['features'].append(features) if max_samples != -1 and i > max_samples: break for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) print(k, dataset[k].shape) # Filter out keypoints indices_to_use = np.where((dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0] for k in dataset.keys(): dataset[k] = dataset[k][indices_to_use] return dataset
def read_data(folder, set, debug=False): dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'shape': [], 'pose': [], 'bbox': [], 'img_name': [], 'features': [], 'valid': [], } model = spin.get_pretrained_hmr() if set == 'val': set = 'test' sequences = [ x.split('.')[0] for x in os.listdir(osp.join(folder, 'sequenceFiles', set)) ] J_regressor = None smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False) if set == 'test': J_regressor = torch.from_numpy( np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float() for i, seq in tqdm(enumerate(sequences)): data_file = osp.join(folder, 'sequenceFiles', set, seq + '.pkl') data = pkl.load(open(data_file, 'rb'), encoding='latin1') img_dir = osp.join(folder, 'imageFiles', seq) num_people = len(data['poses']) num_frames = len(data['img_frame_ids']) assert (data['poses2d'][0].shape[0] == num_frames) for p_id in range(num_people): pose = torch.from_numpy(data['poses'][p_id]).float() shape = torch.from_numpy(data['betas'][p_id][:10]).float().repeat( pose.size(0), 1) trans = torch.from_numpy(data['trans'][p_id]).float() j2d = data['poses2d'][p_id].transpose(0, 2, 1) cam_pose = data['cam_poses'] campose_valid = data['campose_valid'][p_id] # ======== Align the mesh params ======== # rot = pose[:, :3] rot_mat = batch_rodrigues(rot) Rc = torch.from_numpy(cam_pose[:, :3, :3]).float() Rs = torch.bmm(Rc, rot_mat.reshape(-1, 3, 3)) rot = rotation_matrix_to_angle_axis(Rs) pose[:, :3] = rot # ======== Align the mesh params ======== # output = smpl(betas=shape, body_pose=pose[:, 3:], global_orient=pose[:, :3], transl=trans) # verts = output.vertices j3d = output.joints if J_regressor is not None: vertices = output.vertices J_regressor_batch = J_regressor[None, :].expand( vertices.shape[0], -1, -1).to(vertices.device) j3d = torch.matmul(J_regressor_batch, vertices) j3d = j3d[:, H36M_TO_J14, :] img_paths = [] for i_frame in range(num_frames): img_path = os.path.join(img_dir + '/image_{:05d}.jpg'.format(i_frame)) img_paths.append(img_path) bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params( j2d, vis_thresh=VIS_THRESH, sigma=8) # process bbox_params c_x = bbox_params[:, 0] c_y = bbox_params[:, 1] scale = bbox_params[:, 2] w = h = 150. / scale w = h = h * 1.1 bbox = np.vstack([c_x, c_y, w, h]).T # process keypoints j2d[:, :, 2] = j2d[:, :, 2] > 0.3 # set the visibility flags # Convert to common 2d keypoint format perm_idxs = get_perm_idxs('3dpw', 'common') perm_idxs += [0, 0] # no neck, top head j2d = j2d[:, perm_idxs] j2d[:, 12:, 2] = 0.0 # print('j2d', j2d[time_pt1:time_pt2].shape) # print('campose', campose_valid[time_pt1:time_pt2].shape) img_paths_array = np.array(img_paths)[time_pt1:time_pt2] dataset['vid_name'].append( np.array([f'{seq}_{p_id}'] * num_frames)[time_pt1:time_pt2]) dataset['frame_id'].append( np.arange(0, num_frames)[time_pt1:time_pt2]) dataset['img_name'].append(img_paths_array) dataset['joints3D'].append(j3d.numpy()[time_pt1:time_pt2]) dataset['joints2D'].append(j2d[time_pt1:time_pt2]) dataset['shape'].append(shape.numpy()[time_pt1:time_pt2]) dataset['pose'].append(pose.numpy()[time_pt1:time_pt2]) dataset['bbox'].append(bbox) dataset['valid'].append(campose_valid[time_pt1:time_pt2]) features = extract_features(model, img_paths_array, bbox, kp_2d=j2d[time_pt1:time_pt2], debug=debug, dataset='3dpw', scale=1.2) dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) print(k, dataset[k].shape) # Filter out keypoints indices_to_use = np.where( (dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0] for k in dataset.keys(): dataset[k] = dataset[k][indices_to_use] return dataset
def read_data_train(dataset_path, set='train', debug=False): dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'shape': [], 'pose': [], 'bbox': [], 'img_name': [], 'features': [], } # occluders = load_occluders('./data/VOC2012') model = spin.get_pretrained_hmr() if set == 'train': subjects = [1, 5, 6, 7, 8] else: subjects = [9, 11] for subject in subjects: annot_path = osp.join(dataset_path, 'annotations') # camera load with open( osp.join(annot_path, 'Human36M_subject' + str(subject) + '_camera.json'), 'r') as f: cameras = json.load(f) # joint coordinate load with open( osp.join(annot_path, 'Human36M_subject' + str(subject) + '_joint_3d.json'), 'r') as f: joints = json.load(f) # SMPL parameters obtained by NeuralAnnot will be released (https://arxiv.org/abs/2011.11232) after publication # # smpl parameter load # with open(osp.join(annot_path, 'Human36M_subject' + str(subject) + '_SMPL_NeuralAnnot.json'), 'r') as f: # smpl_params = json.load(f) seq_list = sorted(glob.glob(dataset_path + f'/images/s_{subject:02d}*')) for seq in tqdm(seq_list): seq_name = seq.split('/')[-1] act = str(int(seq_name.split('_act_')[-1][0:2])) subact = str(int(seq_name.split('_subact_')[-1][0:2])) cam = str(int(seq_name.split('_ca_')[-1][0:2])) # if cam != '4': # front camera (Table 6) # continue print("seq name: ", seq) img_paths = sorted(glob.glob(seq + '/*.jpg')) num_frames = len(img_paths) if num_frames < 1: continue # camera parameter cam_param = cameras[cam] R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array( cam_param['t'], dtype=np.float32), np.array( cam_param['f'], dtype=np.float32), np.array(cam_param['c'], dtype=np.float32) # img starts from index 1, and annot starts from index 0 poses = np.zeros((num_frames, 72), dtype=np.float32) shapes = np.zeros((num_frames, 10), dtype=np.float32) j3ds = np.zeros((num_frames, 49, 3), dtype=np.float32) j2ds = np.zeros((num_frames, 49, 3), dtype=np.float32) for img_i in tqdm(range(num_frames)): # smpl_param = smpl_params[act][subact][str(img_i)][cam] # pose = np.array(smpl_param['pose'], dtype=np.float32) # shape = np.array(smpl_param['shape'], dtype=np.float32) joint_world = np.array(joints[act][subact][str(img_i)], dtype=np.float32) # match right, left match = [[1, 4], [2, 5], [3, 6]] for m in match: l, r = m joint_world[l], joint_world[r] = joint_world[r].copy( ), joint_world[l].copy() joint_cam = world2cam(joint_world, R, t) joint_img = cam2pixel(joint_cam, f, c) j3d = convert_kps(joint_cam[None, :, :] / 1000, "h36m", "spin").reshape((-1, 3)) j3d = j3d - j3d[39] # 4 is the root joint_img[:, 2] = 1 j2d = convert_kps(joint_img[None, :, :], "h36m", "spin").reshape((-1, 3)) # poses[img_i] = pose # shapes[img_i] = shape j3ds[img_i] = j3d j2ds[img_i] = j2d """ import torch smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False) p = torch.from_numpy(pose).float().reshape(1,-1,3) s = torch.from_numpy(shape).float().reshape(1,-1) J_regressor = torch.from_numpy(np.load(osp.join(TCMR_DATA_DIR, 'J_regressor_h36m.npy'))).float() output = smpl(betas=s, body_pose=p[:, 3:], global_orient=p[:, :3]) vertices = output.vertices J_regressor_batch = J_regressor[None, :].expand(vertices.shape[0], -1, -1).to(vertices.device) temp_j3d = torch.matmul(J_regressor_batch, vertices) * 1000 # temp_j3d = temp_j3d - temp_j3d[:, 0, :] temp_j3d = temp_j3d[0, H36M_TO_J14, :] gt_j3d = joint_cam - joint_cam[0, :] gt_j3d = gt_j3d[H36M_TO_J14, :] print("CHECK: ", (temp_j3d-gt_j3d)) """ bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params( j2ds, vis_thresh=VIS_THRESH, sigma=8) # bbox_params, time_pt1, time_pt2 = get_all_bbox_params(j2ds, vis_thresh=VIS_THRESH) """ img = cv2.imread(img_paths[0]) temp = draw_skeleton(img, j2ds[0], dataset='spin', unnormalize=False, thickness=2) cv2.imshow('img', temp) cv2.waitKey(0) cv2.destroyAllWindows() cv2.waitKey(1) """ # process bbox_params c_x = bbox_params[:, 0] c_y = bbox_params[:, 1] scale = bbox_params[:, 2] w = h = 150. / scale w = h = h * 0.9 # 1.1 for h36m_train_25fps_occ_db.pt bbox = np.vstack([c_x, c_y, w, h]).T img_paths_array = np.array(img_paths)[time_pt1:time_pt2][::2] bbox = bbox[::2] # subsample frame to 25 fps dataset['vid_name'].append( np.array([f'{seq}_{subject}'] * num_frames)[time_pt1:time_pt2][::2]) dataset['frame_id'].append( np.arange(0, num_frames)[time_pt1:time_pt2][::2]) dataset['joints3D'].append(j3ds[time_pt1:time_pt2][::2]) dataset['joints2D'].append(j2ds[time_pt1:time_pt2][::2]) dataset['shape'].append(shapes[time_pt1:time_pt2][::2]) dataset['pose'].append(poses[time_pt1:time_pt2][::2]) dataset['img_name'].append(img_paths_array) dataset['bbox'].append(bbox) features = extract_features( model, None, img_paths_array, bbox, kp_2d=j2ds[time_pt1:time_pt2][::2], debug=debug, dataset='h36m', scale=1.0) # 1.2 for h36m_train_25fps_occ_db.pt dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) print(k, dataset[k].shape) return dataset