def read_data_train(dataset_path, debug=False): h, w = 2048, 2048 dataset = { 'vid_name': [], 'frame_id': [], 'joints3D': [], 'joints2D': [], 'bbox': [], 'img_name': [], 'features': [], } model = spin.get_pretrained_hmr() # training data user_list = range(1, 9) seq_list = range(1, 3) vid_list = list(range(3)) + list(range(4, 9)) # product = product(user_list, seq_list, vid_list) # user_i, seq_i, vid_i = product[process_id] for user_i in user_list: for seq_i in seq_list: seq_path = os.path.join(dataset_path, 'S' + str(user_i), 'Seq' + str(seq_i)) # mat file with annotations annot_file = os.path.join(seq_path, 'annot.mat') annot2 = sio.loadmat(annot_file)['annot2'] annot3 = sio.loadmat(annot_file)['annot3'] # calibration file and camera parameters for j, vid_i in enumerate(vid_list): # image folder imgs_path = os.path.join(seq_path, 'video_' + str(vid_i)) # per frame pattern = os.path.join(imgs_path, '*.jpg') img_list = sorted(glob.glob(pattern)) vid_used_frames = [] vid_used_joints = [] vid_used_bbox = [] vid_segments = [] vid_uniq_id = "subj" + str(user_i) + '_seq' + str( seq_i) + "_vid" + str(vid_i) + "_seg0" for i, img_i in tqdm_enumerate(img_list): # for each image we store the relevant annotations img_name = img_i.split('/')[-1] joints_2d_raw = np.reshape(annot2[vid_i][0][i], (1, 28, 2)) joints_2d_raw = np.append(joints_2d_raw, np.ones((1, 28, 1)), axis=2) joints_2d = convert_kps(joints_2d_raw, "mpii3d", "spin").reshape((-1, 3)) # visualize = True # if visualize == True and i == 500: # import matplotlib.pyplot as plt # # frame = cv2.cvtColor(cv2.imread(img_i), cv2.COLOR_BGR2RGB) # # for k in range(49): # kp = joints_2d[k] # # frame = cv2.circle( # frame.copy(), # (int(kp[0]), int(kp[1])), # thickness=3, # color=(255, 0, 0), # radius=5, # ) # # cv2.putText(frame, f'{k}', (int(kp[0]), int(kp[1]) + 1), cv2.FONT_HERSHEY_SIMPLEX, 1.5, # (0, 255, 0), # thickness=3) # # plt.imshow(frame) # plt.show() joints_3d_raw = np.reshape(annot3[vid_i][0][i], (1, 28, 3)) / 1000 joints_3d = convert_kps(joints_3d_raw, "mpii3d", "spin").reshape((-1, 3)) bbox = get_bbox_from_kp2d( joints_2d[~np.all(joints_2d == 0, axis=1)]).reshape(4) joints_3d = joints_3d - joints_3d[39] # 4 is the root # check that all joints are visible x_in = np.logical_and(joints_2d[:, 0] < w, joints_2d[:, 0] >= 0) y_in = np.logical_and(joints_2d[:, 1] < h, joints_2d[:, 1] >= 0) ok_pts = np.logical_and(x_in, y_in) if np.sum(ok_pts) < joints_2d.shape[0]: vid_uniq_id = "_".join(vid_uniq_id.split("_")[:-1])+ "_seg" +\ str(int(dataset['vid_name'][-1].split("_")[-1][3:])+1) continue dataset['vid_name'].append(vid_uniq_id) dataset['frame_id'].append(img_name.split(".")[0]) dataset['img_name'].append(img_i) dataset['joints2D'].append(joints_2d) dataset['joints3D'].append(joints_3d) dataset['bbox'].append(bbox) vid_segments.append(vid_uniq_id) vid_used_frames.append(img_i) vid_used_joints.append(joints_2d) vid_used_bbox.append(bbox) vid_segments = np.array(vid_segments) ids = np.zeros((len(set(vid_segments)) + 1)) ids[-1] = len(vid_used_frames) + 1 if (np.where( vid_segments[:-1] != vid_segments[1:])[0]).size != 0: ids[1:-1] = (np.where( vid_segments[:-1] != vid_segments[1:])[0]) + 1 # for i in tqdm(range(len(set(vid_segments)))): # features = extract_features(model, np.array(vid_used_frames)[int(ids[i]):int(ids[i+1])], # vid_used_bbox[int(ids[i]):int((ids[i+1]))], # kp_2d=np.array(vid_used_joints)[int(ids[i]):int(ids[i+1])], # dataset='spin', debug=False) # dataset['features'].append(features) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) # dataset['features'] = np.concatenate(dataset['features']) return dataset
def read_data(folder, set): dataset = { 'img_name': [], 'joints2D': [], 'bbox': [], 'vid_name': [], 'features': [], } model = spin.get_pretrained_hmr() file_names = glob.glob( osp.join(folder, 'posetrack_data/annotations/', f'{set}/*.json')) file_names = sorted(file_names) nn_corrupted = 0 tot_frames = 0 min_frame_number = 8 for fid, fname in tqdm_enumerate(file_names): if fname == osp.join(folder, 'annotations/train/021133_mpii_train.json'): continue with open(fname, 'r') as entry: anns = json.load(entry) # num_frames = anns['images'][0]['nframes'] anns['images'] = [ item for item in anns['images'] if item['is_labeled'] ] num_frames = len(anns['images']) frame2imgname = dict() for el in anns['images']: frame2imgname[el['frame_id']] = el['file_name'] num_people = -1 for x in anns['annotations']: if num_people < x['track_id']: num_people = x['track_id'] num_people += 1 posetrack_joints = get_posetrack_original_kp_names() idxs = [ anns['categories'][0]['keypoints'].index(h) for h in posetrack_joints if h in anns['categories'][0]['keypoints'] ] for x in anns['annotations']: kps = np.array(x['keypoints']).reshape((17, 3)) kps = kps[idxs, :] x['keypoints'] = list(kps.flatten()) tot_frames += num_people * num_frames for p_id in range(num_people): annot_pid = [(item['keypoints'], item['bbox'], item['image_id']) for item in anns['annotations'] if item['track_id'] == p_id and not (np.count_nonzero(item['keypoints']) == 0)] if len(annot_pid) < min_frame_number: nn_corrupted += len(annot_pid) continue bbox = np.zeros((len(annot_pid), 4)) # perm_idxs = get_perm_idxs('posetrack', 'common') kp_2d = np.zeros((len(annot_pid), len(annot_pid[0][0]) // 3, 3)) img_paths = np.zeros((len(annot_pid))) for i, (key2djnts, bbox_p, image_id) in enumerate(annot_pid): if (bbox_p[2] == 0 or bbox_p[3] == 0): nn_corrupted += 1 continue img_paths[i] = image_id key2djnts[2::3] = len(key2djnts[2::3]) * [1] kp_2d[i, :] = np.array(key2djnts).reshape( int(len(key2djnts) / 3), 3) # [perm_idxs, :] for kp_loc in kp_2d[i, :]: if kp_loc[0] == 0 and kp_loc[1] == 0: kp_loc[2] = 0 x_tl = bbox_p[0] y_tl = bbox_p[1] w = bbox_p[2] h = bbox_p[3] bbox_p[0] = x_tl + w / 2 bbox_p[1] = y_tl + h / 2 # w = h = np.where(w / h > 1, w, h) w = h = h * 0.8 bbox_p[2] = w bbox_p[3] = h bbox[i, :] = bbox_p img_paths = list(img_paths) img_paths = [ osp.join(folder, frame2imgname[item]) if item != 0 else 0 for item in img_paths ] bbx_idxs = [] for bbx_id, bbx in enumerate(bbox): if np.count_nonzero(bbx) == 0: bbx_idxs += [bbx_id] kp_2d = np.delete(kp_2d, bbx_idxs, 0) img_paths = np.delete(np.array(img_paths), bbx_idxs, 0) bbox = np.delete(bbox, np.where(~bbox.any(axis=1))[0], axis=0) # Convert to common 2d keypoint format if bbox.size == 0 or bbox.shape[0] < min_frame_number: nn_corrupted += 1 continue kp_2d = convert_kps(kp_2d, src='posetrack', dst='spin') dataset['vid_name'].append( np.array([f'{fname}_{p_id}'] * img_paths.shape[0])) dataset['img_name'].append(np.array(img_paths)) dataset['joints2D'].append(kp_2d) dataset['bbox'].append(np.array(bbox)) # compute_features features = extract_features( model, np.array(img_paths), bbox, kp_2d=kp_2d, dataset='spin', debug=False, ) assert kp_2d.shape[0] == img_paths.shape[0] == bbox.shape[0] dataset['features'].append(features) print(nn_corrupted, tot_frames) for k in dataset.keys(): dataset[k] = np.array(dataset[k]) for k in dataset.keys(): dataset[k] = np.concatenate(dataset[k]) for k, v in dataset.items(): print(k, v.shape) return dataset