def read_data(folder): dataset = { 'img_name': [], 'joints2D': [], 'bbox': [], 'vid_name': [], 'features': [], } model = spin.get_pretrained_hmr() file_names = sorted(glob.glob(folder + '/labels/' + '*.mat')) for fname in tqdm(file_names): vid_dict = load_mat(fname) imgs = sorted( glob.glob(folder + '/frames/' + fname.strip().split('/')[-1].split('.')[0] + '/*.jpg')) kp_2d = np.zeros((vid_dict['nframes'], 13, 3)) perm_idxs = get_perm_idxs('pennaction', 'common') kp_2d[:, :, 0] = vid_dict['x'] kp_2d[:, :, 1] = vid_dict['y'] kp_2d[:, :, 2] = vid_dict['visibility'] kp_2d = kp_2d[:, perm_idxs, :] # fix inconsistency n_kp_2d = np.zeros((kp_2d.shape[0], 14, 3)) n_kp_2d[:, :12, :] = kp_2d[:, :-1, :] n_kp_2d[:, 13, :] = kp_2d[:, 12, :] kp_2d = n_kp_2d bbox = np.zeros((vid_dict['nframes'], 4)) for fr_id, fr in enumerate(kp_2d): u, d, l, r = calc_kpt_bound(fr) center = np.array([(l + r) * 0.5, (u + d) * 0.5], dtype=np.float32) c_x, c_y = center[0], center[1] w, h = r - l, d - u w = h = np.where(w / h > 1, w, h) bbox[fr_id, :] = np.array([c_x, c_y, w, h]) dataset['vid_name'].append(np.array([f'{fname}'] * vid_dict['nframes'])) dataset['img_name'].append(np.array(imgs)) dataset['joints2D'].append(kp_2d) dataset['bbox'].append(bbox) features = extract_features(model, np.array(imgs), bbox, dataset='pennaction', debug=False) dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) return dataset
def read_data(folder, set, debug=False): dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'shape': [], 'pose': [], 'bbox': [], 'img_name': [], 'features': [], 'valid': [], } model = spin.get_pretrained_hmr() if set == 'val': set = 'test' sequences = [ x.split('.')[0] for x in os.listdir(osp.join(folder, 'sequenceFiles', set)) ] J_regressor = None smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False) if set == 'test': J_regressor = torch.from_numpy( np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float() for i, seq in tqdm(enumerate(sequences)): data_file = osp.join(folder, 'sequenceFiles', set, seq + '.pkl') data = pkl.load(open(data_file, 'rb'), encoding='latin1') img_dir = osp.join(folder, 'imageFiles', seq) num_people = len(data['poses']) num_frames = len(data['img_frame_ids']) assert (data['poses2d'][0].shape[0] == num_frames) for p_id in range(num_people): pose = torch.from_numpy(data['poses'][p_id]).float() shape = torch.from_numpy(data['betas'][p_id][:10]).float().repeat( pose.size(0), 1) trans = torch.from_numpy(data['trans'][p_id]).float() j2d = data['poses2d'][p_id].transpose(0, 2, 1) cam_pose = data['cam_poses'] campose_valid = data['campose_valid'][p_id] # ======== Align the mesh params ======== # rot = pose[:, :3] rot_mat = batch_rodrigues(rot) Rc = torch.from_numpy(cam_pose[:, :3, :3]).float() Rs = torch.bmm(Rc, rot_mat.reshape(-1, 3, 3)) rot = rotation_matrix_to_angle_axis(Rs) pose[:, :3] = rot # ======== Align the mesh params ======== # output = smpl(betas=shape, body_pose=pose[:, 3:], global_orient=pose[:, :3], transl=trans) # verts = output.vertices j3d = output.joints if J_regressor is not None: vertices = output.vertices J_regressor_batch = J_regressor[None, :].expand( vertices.shape[0], -1, -1).to(vertices.device) j3d = torch.matmul(J_regressor_batch, vertices) j3d = j3d[:, H36M_TO_J14, :] img_paths = [] for i_frame in range(num_frames): img_path = os.path.join(img_dir + '/image_{:05d}.jpg'.format(i_frame)) img_paths.append(img_path) bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params( j2d, vis_thresh=VIS_THRESH, sigma=8) # process bbox_params c_x = bbox_params[:, 0] c_y = bbox_params[:, 1] scale = bbox_params[:, 2] w = h = 150. / scale w = h = h * 1.1 bbox = np.vstack([c_x, c_y, w, h]).T # process keypoints j2d[:, :, 2] = j2d[:, :, 2] > 0.3 # set the visibility flags # Convert to common 2d keypoint format perm_idxs = get_perm_idxs('3dpw', 'common') perm_idxs += [0, 0] # no neck, top head j2d = j2d[:, perm_idxs] j2d[:, 12:, 2] = 0.0 # print('j2d', j2d[time_pt1:time_pt2].shape) # print('campose', campose_valid[time_pt1:time_pt2].shape) img_paths_array = np.array(img_paths)[time_pt1:time_pt2] dataset['vid_name'].append( np.array([f'{seq}_{p_id}'] * num_frames)[time_pt1:time_pt2]) dataset['frame_id'].append( np.arange(0, num_frames)[time_pt1:time_pt2]) dataset['img_name'].append(img_paths_array) dataset['joints3D'].append(j3d.numpy()[time_pt1:time_pt2]) dataset['joints2D'].append(j2d[time_pt1:time_pt2]) dataset['shape'].append(shape.numpy()[time_pt1:time_pt2]) dataset['pose'].append(pose.numpy()[time_pt1:time_pt2]) dataset['bbox'].append(bbox) dataset['valid'].append(campose_valid[time_pt1:time_pt2]) features = extract_features(model, img_paths_array, bbox, kp_2d=j2d[time_pt1:time_pt2], debug=debug, dataset='3dpw', scale=1.2) dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) print(k, dataset[k].shape) # Filter out keypoints indices_to_use = np.where( (dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0] for k in dataset.keys(): dataset[k] = dataset[k][indices_to_use] return dataset
def read_data_train(dataset_path, debug=False): h, w = 2048, 2048 dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'bbox': [], 'img_name': [], 'features': [], } model = spin.get_pretrained_hmr() # training data user_list = range(1, 9) seq_list = range(1, 3) vid_list = list(range(3)) + list(range(4, 9)) # product = product(user_list, seq_list, vid_list) # user_i, seq_i, vid_i = product[process_id] for user_i in user_list: for seq_i in seq_list: seq_path = os.path.join(dataset_path, 'S' + str(user_i), 'Seq' + str(seq_i)) # mat file with annotations annot_file = os.path.join(seq_path, 'annot.mat') annot2 = sio.loadmat(annot_file)['annot2'] annot3 = sio.loadmat(annot_file)['annot3'] # calibration file and camera parameters for j, vid_i in enumerate(vid_list): # image folder imgs_path = os.path.join(seq_path, 'video_' + str(vid_i)) # per frame pattern = os.path.join(imgs_path, '*.jpg') img_list = sorted(glob.glob(pattern)) vid_used_frames = [] vid_used_joints = [] vid_used_bbox = [] vid_segments = [] vid_uniq_id = "subj" + str(user_i) + '_seq' + str( seq_i) + "_vid" + str(vid_i) + "_seg0" for i, img_i in tqdm_enumerate(img_list): # for each image we store the relevant annotations img_name = img_i.split('/')[-1] joints_2d_raw = np.reshape(annot2[vid_i][0][i], (1, 28, 2)) joints_2d_raw = np.append(joints_2d_raw, np.ones((1, 28, 1)), axis=2) joints_2d = convert_kps(joints_2d_raw, "mpii3d", "spin").reshape((-1, 3)) # visualize = True # if visualize == True and i == 500: # import matplotlib.pyplot as plt # # frame = cv2.cvtColor(cv2.imread(img_i), cv2.COLOR_BGR2RGB) # # for k in range(49): # kp = joints_2d[k] # # frame = cv2.circle( # frame.copy(), # (int(kp[0]), int(kp[1])), # thickness=3, # color=(255, 0, 0), # radius=5, # ) # # cv2.putText(frame, f'{k}', (int(kp[0]), int(kp[1]) + 1), cv2.FONT_HERSHEY_SIMPLEX, 1.5, # (0, 255, 0), # thickness=3) # # plt.imshow(frame) # plt.show() joints_3d_raw = np.reshape(annot3[vid_i][0][i], (1, 28, 3)) / 1000 joints_3d = convert_kps(joints_3d_raw, "mpii3d", "spin").reshape((-1, 3)) bbox = get_bbox_from_kp2d( joints_2d[~np.all(joints_2d == 0, axis=1)]).reshape(4) joints_3d = joints_3d - joints_3d[39] # 4 is the root # check that all joints are visible x_in = np.logical_and(joints_2d[:, 0] < w, joints_2d[:, 0] >= 0) y_in = np.logical_and(joints_2d[:, 1] < h, joints_2d[:, 1] >= 0) ok_pts = np.logical_and(x_in, y_in) if np.sum(ok_pts) < joints_2d.shape[0]: vid_uniq_id = "_".join(vid_uniq_id.split("_")[:-1])+ "_seg" +\ str(int(dataset['vid_name'][-1].split("_")[-1][3:])+1) continue dataset['vid_name'].append(vid_uniq_id) dataset['frame_id'].append(img_name.split(".")[0]) dataset['img_name'].append(img_i) dataset['joints2D'].append(joints_2d) dataset['joints3D'].append(joints_3d) dataset['bbox'].append(bbox) vid_segments.append(vid_uniq_id) vid_used_frames.append(img_i) vid_used_joints.append(joints_2d) vid_used_bbox.append(bbox) vid_segments = np.array(vid_segments) ids = np.zeros((len(set(vid_segments)) + 1)) ids[-1] = len(vid_used_frames) + 1 if (np.where( vid_segments[:-1] != vid_segments[1:])[0]).size != 0: ids[1:-1] = (np.where( vid_segments[:-1] != vid_segments[1:])[0]) + 1 # for i in tqdm(range(len(set(vid_segments)))): # features = extract_features(model, np.array(vid_used_frames)[int(ids[i]):int(ids[i+1])], # vid_used_bbox[int(ids[i]):int((ids[i+1]))], # kp_2d=np.array(vid_used_joints)[int(ids[i]):int(ids[i+1])], # dataset='spin', debug=False) # dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) # dataset['features'] = np.concatenate(dataset['features']) return dataset
def read_single_record(fname): dataset = { 'vid_name': [], 'frame_id': [], 'joints2D': [], # should contain openpose keypoints only # 'features': [], # 'bbox':[], } model = spin.get_pretrained_hmr() sess = tf.Session() for vid_idx, serialized_ex in tqdm( enumerate(tf.python_io.tf_record_iterator(fname))): example = tf.train.Example() example.ParseFromString(serialized_ex) N = int(example.features.feature['meta/N'].int64_list.value[0]) # print(fname, vid_idx, N) # This is a list of length N images_data = example.features.feature[ 'image/encoded'].bytes_list.value xys = example.features.feature['image/xys'].float_list.value xys = np.array(xys).reshape(-1, 2, 14) face_pts = example.features.feature['image/face_pts'].float_list.value face_pts = np.array(face_pts).reshape(-1, 3, 5) toe_pts = example.features.feature['image/toe_pts'].float_list.value if len(toe_pts) == 0: toe_pts = np.zeros(xys.shape[0], 3, 6) toe_pts = np.array(toe_pts).reshape(-1, 3, 6) visibles = example.features.feature[ 'image/visibilities'].int64_list.value visibles = np.array(visibles).reshape(-1, 1, 14) video = [] kp_2d = [] for i in range(N): image = np.expand_dims(sess.run( tf.image.decode_jpeg(images_data[i], channels=3)), axis=0) video.append(image) kp = np.vstack((xys[i], visibles[i])) faces = face_pts[i] toes = toe_pts[i] kp = np.hstack((kp, faces, toes)) if 'image/phis' in example.features.feature.keys(): # Preprocessed, so kps are in [-1, 1] img_shape = 224 # image.shape[0] vis = kp[2, :] kp = ((kp[:2, :] + 1) * 0.5) * img_shape kp = np.vstack((kp, vis)) kp_2d.append(np.expand_dims(kp.T, axis=0)) video = np.concatenate(video, axis=0) kp_2d = np.concatenate(kp_2d, axis=0) vid_name = f'{fname}-{vid_idx}' frame_id = np.arange(N) joints2D = kp_2d dataset['vid_name'].append(np.array([vid_name] * N)) dataset['frame_id'].append(frame_id) dataset['joints2D'].append(joints2D) # dataset['video'].append(video) # features = extract_features(model, video, bbox=None, kp_2d=kp_2d, dataset='insta', debug=False) # dataset['features'].append(features) # # print(features.shape) # assert features.shape[0] == N for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) for k, v in dataset.items(): print(k, len(v)) return dataset
def read_test_data(dataset_path): dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'bbox': [], 'img_name': [], 'features': [], "valid_i": [] } model = spin.get_pretrained_hmr() user_list = range(1, 7) for user_i in user_list: print('Subject', user_i) seq_path = os.path.join(dataset_path, 'mpi_inf_3dhp_test_set', 'TS' + str(user_i)) # mat file with annotations annot_file = os.path.join(seq_path, 'annot_data.mat') mat_as_h5 = h5py.File(annot_file, 'r') annot2 = np.array(mat_as_h5['annot2']) annot3 = np.array(mat_as_h5['univ_annot3']) valid = np.array(mat_as_h5['valid_frame']) vid_used_frames = [] vid_used_joints = [] vid_used_bbox = [] vid_segments = [] vid_uniq_id = "subj" + str(user_i) + "_seg0" for frame_i, valid_i in tqdm(enumerate(valid)): img_i = os.path.join('mpi_inf_3dhp_test_set', 'TS' + str(user_i), 'imageSequence', 'img_' + str(frame_i + 1).zfill(6) + '.jpg') joints_2d_raw = np.expand_dims(annot2[frame_i, 0, :, :], axis=0) joints_2d_raw = np.append(joints_2d_raw, np.ones((1, 17, 1)), axis=2) joints_2d = convert_kps(joints_2d_raw, src="mpii3d_test", dst="spin").reshape((-1, 3)) # visualize = True # if visualize == True: # import matplotlib.pyplot as plt # # frame = cv2.cvtColor(cv2.imread(os.path.join(dataset_path, img_i)), cv2.COLOR_BGR2RGB) # # for k in range(49): # kp = joints_2d[k] # # frame = cv2.circle( # frame.copy(), # (int(kp[0]), int(kp[1])), # thickness=3, # color=(255, 0, 0), # radius=5, # ) # # cv2.putText(frame, f'{k}', (int(kp[0]), int(kp[1]) + 1), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), # thickness=3) # # plt.imshow(frame) # plt.show() joints_3d_raw = np.reshape(annot3[frame_i, 0, :, :], (1, 17, 3)) / 1000 joints_3d = convert_kps(joints_3d_raw, "mpii3d_test", "spin").reshape((-1, 3)) joints_3d = joints_3d - joints_3d[ 39] # substract pelvis zero is the root for test bbox = get_bbox_from_kp2d( joints_2d[~np.all(joints_2d == 0, axis=1)]).reshape(4) # check that all joints are visible img_file = os.path.join(dataset_path, img_i) I = cv2.imread(img_file) h, w, _ = I.shape x_in = np.logical_and(joints_2d[:, 0] < w, joints_2d[:, 0] >= 0) y_in = np.logical_and(joints_2d[:, 1] < h, joints_2d[:, 1] >= 0) ok_pts = np.logical_and(x_in, y_in) if np.sum(ok_pts) < joints_2d.shape[0]: vid_uniq_id = "_".join(vid_uniq_id.split("_")[:-1]) + "_seg" + \ str(int(dataset['vid_name'][-1].split("_")[-1][3:]) + 1) continue dataset['vid_name'].append(vid_uniq_id) dataset['frame_id'].append(img_file.split("/")[-1].split(".")[0]) dataset['img_name'].append(img_file) dataset['joints2D'].append(joints_2d) dataset['joints3D'].append(joints_3d) dataset['bbox'].append(bbox) dataset['valid_i'].append(valid_i) vid_segments.append(vid_uniq_id) vid_used_frames.append(img_file) vid_used_joints.append(joints_2d) vid_used_bbox.append(bbox) vid_segments = np.array(vid_segments) ids = np.zeros((len(set(vid_segments)) + 1)) ids[-1] = len(vid_used_frames) + 1 if (np.where(vid_segments[:-1] != vid_segments[1:])[0]).size != 0: ids[1:-1] = (np.where( vid_segments[:-1] != vid_segments[1:])[0]) + 1 # for i in tqdm(range(len(set(vid_segments)))): # features = extract_features(model, np.array(vid_used_frames)[int(ids[i]):int(ids[i + 1])], # vid_used_bbox[int(ids[i]):int(ids[i + 1])], # kp_2d=np.array(vid_used_joints)[int(ids[i]):int(ids[i + 1])], # dataset='spin', debug=False) # dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) # dataset['features'] = np.concatenate(dataset['features']) return dataset
def read_data(folder, set): dataset = { 'img_name': [], 'joints2D': [], 'bbox': [], 'vid_name': [], 'features': [], } model = spin.get_pretrained_hmr() file_names = glob.glob( osp.join(folder, 'posetrack_data/annotations/', f'{set}/*.json')) file_names = sorted(file_names) nn_corrupted = 0 tot_frames = 0 min_frame_number = 8 for fid, fname in tqdm_enumerate(file_names): if fname == osp.join(folder, 'annotations/train/021133_mpii_train.json'): continue with open(fname, 'r') as entry: anns = json.load(entry) # num_frames = anns['images'][0]['nframes'] anns['images'] = [ item for item in anns['images'] if item['is_labeled'] ] num_frames = len(anns['images']) frame2imgname = dict() for el in anns['images']: frame2imgname[el['frame_id']] = el['file_name'] num_people = -1 for x in anns['annotations']: if num_people < x['track_id']: num_people = x['track_id'] num_people += 1 posetrack_joints = get_posetrack_original_kp_names() idxs = [ anns['categories'][0]['keypoints'].index(h) for h in posetrack_joints if h in anns['categories'][0]['keypoints'] ] for x in anns['annotations']: kps = np.array(x['keypoints']).reshape((17, 3)) kps = kps[idxs, :] x['keypoints'] = list(kps.flatten()) tot_frames += num_people * num_frames for p_id in range(num_people): annot_pid = [(item['keypoints'], item['bbox'], item['image_id']) for item in anns['annotations'] if item['track_id'] == p_id and not (np.count_nonzero(item['keypoints']) == 0)] if len(annot_pid) < min_frame_number: nn_corrupted += len(annot_pid) continue bbox = np.zeros((len(annot_pid), 4)) # perm_idxs = get_perm_idxs('posetrack', 'common') kp_2d = np.zeros((len(annot_pid), len(annot_pid[0][0]) // 3, 3)) img_paths = np.zeros((len(annot_pid))) for i, (key2djnts, bbox_p, image_id) in enumerate(annot_pid): if (bbox_p[2] == 0 or bbox_p[3] == 0): nn_corrupted += 1 continue img_paths[i] = image_id key2djnts[2::3] = len(key2djnts[2::3]) * [1] kp_2d[i, :] = np.array(key2djnts).reshape( int(len(key2djnts) / 3), 3) # [perm_idxs, :] for kp_loc in kp_2d[i, :]: if kp_loc[0] == 0 and kp_loc[1] == 0: kp_loc[2] = 0 x_tl = bbox_p[0] y_tl = bbox_p[1] w = bbox_p[2] h = bbox_p[3] bbox_p[0] = x_tl + w / 2 bbox_p[1] = y_tl + h / 2 # w = h = np.where(w / h > 1, w, h) w = h = h * 0.8 bbox_p[2] = w bbox_p[3] = h bbox[i, :] = bbox_p img_paths = list(img_paths) img_paths = [ osp.join(folder, frame2imgname[item]) if item != 0 else 0 for item in img_paths ] bbx_idxs = [] for bbx_id, bbx in enumerate(bbox): if np.count_nonzero(bbx) == 0: bbx_idxs += [bbx_id] kp_2d = np.delete(kp_2d, bbx_idxs, 0) img_paths = np.delete(np.array(img_paths), bbx_idxs, 0) bbox = np.delete(bbox, np.where(~bbox.any(axis=1))[0], axis=0) # Convert to common 2d keypoint format if bbox.size == 0 or bbox.shape[0] < min_frame_number: nn_corrupted += 1 continue kp_2d = convert_kps(kp_2d, src='posetrack', dst='spin') dataset['vid_name'].append( np.array([f'{fname}_{p_id}'] * img_paths.shape[0])) dataset['img_name'].append(np.array(img_paths)) dataset['joints2D'].append(kp_2d) dataset['bbox'].append(np.array(bbox)) # compute_features features = extract_features( model, np.array(img_paths), bbox, kp_2d=kp_2d, dataset='spin', debug=False, ) assert kp_2d.shape[0] == img_paths.shape[0] == bbox.shape[0] dataset['features'].append(features) print(nn_corrupted, tot_frames) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) for k, v in dataset.items(): print(k, v.shape) return dataset
def read_data(amass_data, set, debug=False, max_samples = -1): dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'shape': [], 'pose': [], 'bbox': [], 'img_name': [], 'features': [], 'valid': [], } device = ( torch.device("cuda", index=0) if torch.cuda.is_available() else torch.device("cpu") ) model = spin.get_pretrained_hmr() smpl_renderer = SMPL_Renderer(device = device, image_size = 400, camera_mode = "look_at") for i, (k,v) in tqdm(enumerate(amass_data)): vid_name, frame_id, j3d, j2d, shape, pose, bbox, img_name, features, valid = amass_to_dataset(k, v, set = set, smpl_renderer = smpl_renderer) if not vid_name is None: bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params(j2d, vis_thresh=VIS_THRESH, sigma=8) c_x = bbox_params[:,0] c_y = bbox_params[:,1] scale = bbox_params[:,2] w = h = 150. / scale w = h = h * 1.1 bbox = np.vstack([c_x,c_y,w,h]).T # print('campose', campose_valid[time_pt1:time_pt2].shape) img_paths_array = img_name dataset['vid_name'].append(vid_name) dataset['frame_id'].append(frame_id) dataset['img_name'].append(img_name) dataset['joints3D'].append(j3d) dataset['joints2D'].append(j2d) dataset['shape'].append(shape) dataset['pose'].append(pose) dataset['bbox'].append(bbox) dataset['valid'].append(valid) features = extract_features(model, img_paths_array, bbox, kp_2d=j2d[time_pt1:time_pt2], debug=debug, dataset='3dpw', scale=1.2) dataset['features'].append(features) if max_samples != -1 and i > max_samples: break for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) print(k, dataset[k].shape) # Filter out keypoints indices_to_use = np.where((dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0] for k in dataset.keys(): dataset[k] = dataset[k][indices_to_use] return dataset
def read_data_train(dataset_path, set='train', debug=False): dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'shape': [], 'pose': [], 'bbox': [], 'img_name': [], 'features': [], } # occluders = load_occluders('./data/VOC2012') model = spin.get_pretrained_hmr() if set == 'train': subjects = [1, 5, 6, 7, 8] else: subjects = [9, 11] for subject in subjects: annot_path = osp.join(dataset_path, 'annotations') # camera load with open( osp.join(annot_path, 'Human36M_subject' + str(subject) + '_camera.json'), 'r') as f: cameras = json.load(f) # joint coordinate load with open( osp.join(annot_path, 'Human36M_subject' + str(subject) + '_joint_3d.json'), 'r') as f: joints = json.load(f) # SMPL parameters obtained by NeuralAnnot will be released (https://arxiv.org/abs/2011.11232) after publication # # smpl parameter load # with open(osp.join(annot_path, 'Human36M_subject' + str(subject) + '_SMPL_NeuralAnnot.json'), 'r') as f: # smpl_params = json.load(f) seq_list = sorted(glob.glob(dataset_path + f'/images/s_{subject:02d}*')) for seq in tqdm(seq_list): seq_name = seq.split('/')[-1] act = str(int(seq_name.split('_act_')[-1][0:2])) subact = str(int(seq_name.split('_subact_')[-1][0:2])) cam = str(int(seq_name.split('_ca_')[-1][0:2])) # if cam != '4': # front camera (Table 6) # continue print("seq name: ", seq) img_paths = sorted(glob.glob(seq + '/*.jpg')) num_frames = len(img_paths) if num_frames < 1: continue # camera parameter cam_param = cameras[cam] R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array( cam_param['t'], dtype=np.float32), np.array( cam_param['f'], dtype=np.float32), np.array(cam_param['c'], dtype=np.float32) # img starts from index 1, and annot starts from index 0 poses = np.zeros((num_frames, 72), dtype=np.float32) shapes = np.zeros((num_frames, 10), dtype=np.float32) j3ds = np.zeros((num_frames, 49, 3), dtype=np.float32) j2ds = np.zeros((num_frames, 49, 3), dtype=np.float32) for img_i in tqdm(range(num_frames)): # smpl_param = smpl_params[act][subact][str(img_i)][cam] # pose = np.array(smpl_param['pose'], dtype=np.float32) # shape = np.array(smpl_param['shape'], dtype=np.float32) joint_world = np.array(joints[act][subact][str(img_i)], dtype=np.float32) # match right, left match = [[1, 4], [2, 5], [3, 6]] for m in match: l, r = m joint_world[l], joint_world[r] = joint_world[r].copy( ), joint_world[l].copy() joint_cam = world2cam(joint_world, R, t) joint_img = cam2pixel(joint_cam, f, c) j3d = convert_kps(joint_cam[None, :, :] / 1000, "h36m", "spin").reshape((-1, 3)) j3d = j3d - j3d[39] # 4 is the root joint_img[:, 2] = 1 j2d = convert_kps(joint_img[None, :, :], "h36m", "spin").reshape((-1, 3)) # poses[img_i] = pose # shapes[img_i] = shape j3ds[img_i] = j3d j2ds[img_i] = j2d """ import torch smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False) p = torch.from_numpy(pose).float().reshape(1,-1,3) s = torch.from_numpy(shape).float().reshape(1,-1) J_regressor = torch.from_numpy(np.load(osp.join(TCMR_DATA_DIR, 'J_regressor_h36m.npy'))).float() output = smpl(betas=s, body_pose=p[:, 3:], global_orient=p[:, :3]) vertices = output.vertices J_regressor_batch = J_regressor[None, :].expand(vertices.shape[0], -1, -1).to(vertices.device) temp_j3d = torch.matmul(J_regressor_batch, vertices) * 1000 # temp_j3d = temp_j3d - temp_j3d[:, 0, :] temp_j3d = temp_j3d[0, H36M_TO_J14, :] gt_j3d = joint_cam - joint_cam[0, :] gt_j3d = gt_j3d[H36M_TO_J14, :] print("CHECK: ", (temp_j3d-gt_j3d)) """ bbox_params, time_pt1, time_pt2 = get_smooth_bbox_params( j2ds, vis_thresh=VIS_THRESH, sigma=8) # bbox_params, time_pt1, time_pt2 = get_all_bbox_params(j2ds, vis_thresh=VIS_THRESH) """ img = cv2.imread(img_paths[0]) temp = draw_skeleton(img, j2ds[0], dataset='spin', unnormalize=False, thickness=2) cv2.imshow('img', temp) cv2.waitKey(0) cv2.destroyAllWindows() cv2.waitKey(1) """ # process bbox_params c_x = bbox_params[:, 0] c_y = bbox_params[:, 1] scale = bbox_params[:, 2] w = h = 150. / scale w = h = h * 0.9 # 1.1 for h36m_train_25fps_occ_db.pt bbox = np.vstack([c_x, c_y, w, h]).T img_paths_array = np.array(img_paths)[time_pt1:time_pt2][::2] bbox = bbox[::2] # subsample frame to 25 fps dataset['vid_name'].append( np.array([f'{seq}_{subject}'] * num_frames)[time_pt1:time_pt2][::2]) dataset['frame_id'].append( np.arange(0, num_frames)[time_pt1:time_pt2][::2]) dataset['joints3D'].append(j3ds[time_pt1:time_pt2][::2]) dataset['joints2D'].append(j2ds[time_pt1:time_pt2][::2]) dataset['shape'].append(shapes[time_pt1:time_pt2][::2]) dataset['pose'].append(poses[time_pt1:time_pt2][::2]) dataset['img_name'].append(img_paths_array) dataset['bbox'].append(bbox) features = extract_features( model, None, img_paths_array, bbox, kp_2d=j2ds[time_pt1:time_pt2][::2], debug=debug, dataset='h36m', scale=1.0) # 1.2 for h36m_train_25fps_occ_db.pt dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) print(k, dataset[k].shape) return dataset
def read_data(folder): dataset = { 'img_name': [], 'joints2D': [], 'bbox': [], 'bbox_orig': [], 'vid_name': [], 'features': [], } model = spin.get_pretrained_hmr() file_names = sorted(glob.glob(folder + '/labels/' + '*.mat')) for fname in tqdm(file_names): vid_dict = load_mat(fname) imgs = sorted( glob.glob(folder + '/frames/' + fname.strip().split('/')[-1].split('.')[0] + '/*.jpg')) kp_2d = np.zeros((vid_dict['nframes'], 13, 3)) perm_idxs = get_perm_idxs('pennaction', 'common') kp_2d[:, :, 0] = vid_dict['x'] kp_2d[:, :, 1] = vid_dict['y'] kp_2d[:, :, 2] = vid_dict['visibility'] kp_2d = kp_2d[:, perm_idxs, :] # fix inconsistency n_kp_2d = np.zeros((kp_2d.shape[0], 14, 3)) n_kp_2d[:, :12, :] = kp_2d[:, :-1, :] n_kp_2d[:, 13, :] = kp_2d[:, 12, :] kp_2d = n_kp_2d bbox = np.zeros((vid_dict['nframes'], 4)) bbox_orig = np.zeros((vid_dict['nframes'], 4)) for fr_id, fr in enumerate(kp_2d): u, d, l, r = calc_kpt_bound(fr) center = np.array([(l + r) * 0.5, (u + d) * 0.5], dtype=np.float32) c_x, c_y = center[0], center[1] w, h = r - l, d - u h *= 1.1 bbox_orig[fr_id, :] = np.array([c_x, c_y, h * 0.5, h]) w = h = np.where(w / h > 1, w, h) bbox[fr_id, :] = np.array([c_x, c_y, w, h]) # if True: # tmpimgname = imgs[fr_id] # import matplotlib.pyplot as plt # import matplotlib.patches as patches # fig, ax = plt.subplots() # tmpimg = plt.imread(tmpimgname) # ax.imshow(tmpimg) # rect = patches.Rectangle((bbox_orig[fr_id, 0] - bbox_orig[fr_id, 2] / 2, bbox_orig[fr_id, 1] - bbox_orig[fr_id, 3] / 2), # bbox_orig[fr_id, 2], bbox_orig[fr_id, 3], linewidth=2, edgecolor='r', facecolor='none') # ax.add_patch(rect) # rect = patches.Rectangle((bbox[fr_id, 0] - bbox[fr_id, 2] / 2, bbox[fr_id, 1] - bbox[fr_id, 3] / 2), # bbox[fr_id, 2], bbox[fr_id, 3], linewidth=2, edgecolor='g', facecolor='none') # ax.add_patch(rect) # plt.show() # print('vis') dataset['vid_name'].append(np.array([f'{fname}'] * vid_dict['nframes'])) dataset['img_name'].append(np.array(imgs)) dataset['joints2D'].append(kp_2d) dataset['bbox'].append(bbox) dataset['bbox_orig'].append(bbox_orig) features = extract_features(model, np.array(imgs), bbox, dataset='pennaction', debug=False) dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) return dataset