def default_drawer(gts, preds, valid, ax): show3Dpose(gts, MuPoTSJoints(), ax=ax, invert_vertical=True, show_numbers=False, lcolor="#911f1f", rcolor="#874924", ccolor="#1b4882") for p in preds[valid]: add3Dpose(p, ax, MuPoTSJoints()) ax.set_xlim3d([-RADIUS - 400 + xroot, RADIUS + xroot + 600]) ax.set_ylim3d([-RADIUS + zroot - 200, RADIUS + zroot + 100]) ax.set_zlim3d([bottom + 10, bottom - 2500])
def train_ground_truth(sub, seq, fix_incorrect=True): """ Returns the ground truth annotations. Returns a dict with fields 'annot2', 'annot3', 'univ_annot3' :param fix_incorrect: S4/Seq2 has annotations flipped on some frames, if True they are flipped back :return: """ annot = load( os.path.join(MPII_3DHP_PATH, "S%d" % sub, "Seq%d" % seq, "annot.mat")) annot2 = list([ x[0].reshape((-1, 28, 2))[:, MUPOTS_RELEVANT_JOINTS].astype("float32") for x in annot["annot2"] ]) annot3 = list([ x[0].reshape((-1, 28, 3))[:, MUPOTS_RELEVANT_JOINTS].astype("float32") for x in annot["annot3"] ]) univ_annot3 = list([ x[0].reshape((-1, 28, 3))[:, MUPOTS_RELEVANT_JOINTS].astype("float32") for x in annot["univ_annot3"] ]) assert np.all(annot["cameras"][0] == np.arange(14)) assert np.all(annot["frames"][:, 0] == np.arange(len(annot2[0]))) # S3/Seq1 has one extra annotation but one less frame # Remove the very last annotation from everywhere if sub == 3 and seq == 1: for cam in range(14): annot2[cam] = annot2[cam][:-1] annot3[cam] = annot3[cam][:-1] univ_annot3[cam] = univ_annot3[cam][:-1] if sub == 4 and seq == 2 and fix_incorrect: # between 3759(in) and 5853(ex) annotations are flipped for cam in range(14): annot2[cam][3759:5853] = MuPoTSJoints().flip( annot2[cam][3759:5853]) annot3[cam][3759:5853] = MuPoTSJoints().flip( annot3[cam][3759:5853]) univ_annot3[cam][3759:5853] = MuPoTSJoints().flip( univ_annot3[cam][3759:5853]) N = len(annot2[0]) for cam in range(14): assert len(annot2[cam]) == N assert len(annot3[cam]) == N assert len(univ_annot3[cam]) == N result = {"annot2": annot2, "annot3": annot3, "univ_annot3": univ_annot3} return result
def main(img_folder, metadata, poses_path, depth_folder, out_path, visualize): config, model = load_model('unnormalized') test_set = ImageFolderDataset(img_folder, metadata, poses_path, depth_folder) transforms = load_transforms('unnormalized', config, test_set) + [lambda x: x['pose2d']] test_set.transform = Compose(transforms) test_loader = DataLoader(test_set) pred = torch_predict(model, test_loader) mean3d = transforms[1].normalizer.mean std3d = transforms[1].normalizer.std pred = combine_pose_and_trans(pred, std3d, mean3d, MuPoTSJoints(), 'hip') result = {} for image in test_set.images: inds = test_set.img_names == image result[image] = pred[inds] save(out_path, result) if visualize: image = test_set.images[0] image_path = os.path.join(img_folder, image) show_result(image_path, result[image])
def augment(self, scale_by_dist, scales=None): """ Augments the data in a pose dataset. It simulates moving the poses closer and further away from the camera. The method takes the dataset D, applies a transformation T, and concatenates the transformed data to the original data. :param scale_by_dist: If true, during augmentation it scales values with l2 distance from camera, otherwise with z coordinate (depth). :param scales: if defined, values in this array used for scaling instead of random values """ assert isinstance(self.pose3d_jointset, MuPoTSJoints), "only implemented for MupoTS joints" orig_size = len(self.poses2d) root_ind = MuPoTSJoints().index_of('hip') # Calculating minimum scale to avoid joints behind camera if scales is None: limb_vec = self.poses3d[:, :, 2] - self.poses3d[:, [root_ind], 2] min_scale = np.nanmax(-limb_vec / self.poses3d[:, [root_ind], 2], axis=1) scales = np.random.normal(1, 0.25, orig_size) scales[scales < 0.6] = 1 scales = np.maximum(scales, min_scale + 1e-5) scales[scales > 1.5] = 1 scales = scales.reshape((-1, 1)) else: assert scales.ndim == 2, "scales is expected to be a column vector" self.scales = scales.copy() # Duplicate all the training data, the first half is the original unchanged, # the second half is augmented for field in [ 'poses2d', 'poses3d', 'fx', 'fy', 'cx', 'cy', 'width', 'valid_2d_pred' ]: if hasattr(self, field): data = self.__getattribute__(field) self.__setattr__(field, np.concatenate([data, data.copy()])) if hasattr(self, 'index'): self.index = np.concatenate([self.index, self.index.copy()]) # Calculate the new 3D coordinates of the poses orig_roots = np.expand_dims(self.poses3d[orig_size:, root_ind, :].copy(), 1) # (nPoses, 1, 3) new_roots = orig_roots * np.expand_dims(scales, 1) self.poses3d[orig_size:, :, :] = self.poses3d[ orig_size:, :, :] - orig_roots + new_roots pose2d_root_ind = self.pose2d_jointset.index_of('hip') self.poses2d[orig_size:, :, :2] = (self.poses2d[orig_size:, :, :2] - self.poses2d[orig_size:, [pose2d_root_ind], :2]) / scales[:, :, None] \ + self.poses2d[orig_size:, [pose2d_root_ind], :2] assert np.all((self.poses3d[:, :, 2] >= 0) | np.isnan(self.poses3d[:, :, 2])), "Joint behind camera"
def generate_vid_frames(cam, vid_id): print(cam, vid_id) metas = sequence_metas[cam][vid_id] steps = [ 2 if mpii_3dhp.get_train_fps(meta[0], meta[1]) == 50 else 1 for meta in metas ] out_folder = os.path.join(muco_temp.MUCO_TEMP_PATH, 'frames/cam_%d/vid_%d' % (cam, vid_id)) ensuredir(out_folder) gt_poses = load( os.path.join(muco_temp.MUCO_TEMP_PATH, 'frames/cam_%d/gt.pkl' % cam))[vid_id]['annot3'] hip_ind = MuPoTSJoints().index_of('hip') for i in range(NUM_FRAMES): # generate frame depths = gt_poses[i, :, hip_ind, 2] ordered_poses = np.argsort( depths)[::-1] # poses ordered by depth in decreasing order bg_ind = ordered_poses[0] img = mpii_3dhp.get_image(metas[bg_ind][0], metas[bg_ind][1], cam, metas[bg_ind][2] + i * steps[bg_ind], rgb=False) img = img.astype('float32') # add new pose onto image for pose_ind in ordered_poses[1:]: sub, seq, start = metas[pose_ind] pose_img = mpii_3dhp.get_image(sub, seq, cam, start + i * steps[pose_ind], rgb=False) # mask is 0 at greenscreen bg, 1 at foreground (body, chair) mask = mpii_3dhp.get_mask(sub, seq, cam, start + i * steps[pose_ind], 'FGmasks')[:, :, 2] / 255. mask = cv2.GaussianBlur(mask, (0, 0), 2)[:, :, np.newaxis] # chair_mask is 0 at chair, 1 everywhere else chair_mask = mpii_3dhp.get_mask(sub, seq, cam, start + i * steps[pose_ind], 'ChairMasks')[:, :, [2]] / 255 img = chair_mask * img + (1 - chair_mask) * pose_img img = mask * pose_img + (1 - mask) * img img = img.astype('uint8') cv2.imwrite(os.path.join(out_folder, 'img_%04d.jpg' % i), img, [cv2.IMWRITE_JPEG_QUALITY, 80])
def __init__(self, img_folder, metadata, poses_path, depth_folder): self.transform = None self.images = sorted(os.listdir(img_folder)) # Load camera parameters with open(metadata, 'r') as f: data = f.readlines() data = [x.split(',') for x in data] data = [[y.strip() for y in x] for x in data] camera_params = {x[0]: [float(y) for y in x[1:]] for x in data[1:]} # Prepare data poses2d = [] fx = [] fy = [] cx = [] cy = [] img_names = [] jointwise_depth = [] pred2d = load(poses_path) for image in self.images: poses = [np.array(x['keypoints']).reshape((17, 3)) for x in pred2d[image]] poses = np.stack(poses, axis=0) # (nPoses, 17, 3) poses = extend_hrnet_raw(poses) # (nPoses, 19, 3) img = cv2.imread(os.path.join(img_folder, image)) width, height = recommended_size(img.shape) depth = load(os.path.join(depth_folder, image + '.npy')) depth = depth_from_coords(depth, poses.reshape((1, -1, 3))[:, :, :2], width, height) # (nFrames(=1), nPoses*19) depth = depth.reshape((-1, 19)) # (nPoses, 19) jointwise_depth.append(depth) poses2d.append(poses) for i, field in enumerate([fx, fy, cx, cy]): field.extend([camera_params[image][i]] * len(poses)) img_names.extend([image] * len(poses)) self.poses2d = np.concatenate(poses2d).astype('float32') self.poses3d = np.ones_like(self.poses2d)[:, :17] self.fx = np.array(fx, dtype='float32') self.fy = np.array(fy, dtype='float32') self.cx = np.array(cx, dtype='float32') self.cy = np.array(cy, dtype='float32') self.img_names = np.array(img_names) self.pred_cdepths = np.concatenate(jointwise_depth).astype('float32') self.pose2d_jointset = CocoExJoints() self.pose3d_jointset = MuPoTSJoints()
def eval_poses(is_relative, pose3d_type, preds_3d_kpt): """ Calculates the PCK and AUC. This function is equivalent to ``mpii_test_predictions.m``. :param is_relative: True if relative error is calculated :param pose3d_type: 'annot3' or 'univ_annot3' :param preds_3d_kpt: seq->ndarray(nFrames,17,3), in MuPo-TS joint order. 3D pose predictions. :return: two dicts from seq name to pck and auc """ # Joints used in original evaluation script joint_groups = [ ["Head", [0]], ["Neck", [1]], ["Shou", [2, 5]], ["Elbow", [3, 6]], ["Wrist", [4, 7]], ["Hip", [8, 11]], ["Knee", [9, 12]], ["Ankle", [10, 13]], ] scored_joints = np.concatenate( [x[1] for x in joint_groups]) # Those joints that take part in scoring pck_by_sequence = {} auc_by_sequence = {} for seq in range(1, 7): gt = test_ground_truth(seq) gt3d = gt[pose3d_type][gt["valid_frame"]] pred3d = preds_3d_kpt[seq][gt["valid_frame"]] # (nFrames, nJoints, 3) if is_relative: hip_ind = MuPoTSJoints().index_of("hip") gt3d -= gt3d[:, [hip_ind]] pred3d -= pred3d[:, [hip_ind]] jointwise_err = np.linalg.norm(gt3d - pred3d, axis=-1) # (nFrames, nJoints) pck_by_sequence[seq] = ( np.mean(jointwise_err[:, scored_joints] < PCK_THRESHOLD) * 100) auc_by_sequence[seq] = (np.mean([ np.mean(jointwise_err[:, scored_joints] < t) for t in AUC_THRESHOLDS ]) * 100) return pck_by_sequence, auc_by_sequence
def show_result(image_path, poses): assert_shape(poses, (None, MuPoTSJoints.NUM_JOINTS, 3)) # import here so it's not needed for prediction import matplotlib.pyplot as plt from util import viz img = cv2.imread(image_path) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) plt.figure(figsize=(9, 4.5)) plt.subplot(1, 2, 1) plt.imshow(img) ax = viz.subplot(1, 2, 2) viz.show3Dpose(poses, MuPoTSJoints(), ax, invert_vertical=True) plt.show()
def __init__(self, frame_folder, hrnet_keypoint_file, fx, fy, cx=None, cy=None): self.transform = None self.pose2d_jointset = CocoExJoints() self.pose3d_jointset = MuPoTSJoints() frame_list = sorted(os.listdir(frame_folder)) N = len(frame_list) hrnet_detections = load(hrnet_keypoint_file) self.poses2d, self.valid_2d_pred = stack_hrnet_raw( frame_list, hrnet_detections) assert len(self.poses2d) == N, "unexpected number of frames" index = [('vid', i) for i in range(N)] self.index = np.rec.array(index, dtype=[('seq', 'U4'), ('frame', 'int32')]) self.poses3d = np.ones( (N, self.pose3d_jointset.NUM_JOINTS, 3)) # dummy values # load first frame to get width/height frame = cv2.imread(os.path.join(frame_folder, frame_list[0])) self.width = frame.shape[1] self.fx = np.full(N, fx, dtype='float32') self.fy = np.full(N, fy, dtype='float32') self.cx = np.full(N, cx if cx is not None else frame.shape[1] / 2, dtype='float32') self.cy = np.full(N, cy if cy is not None else frame.shape[0] / 2, dtype='float32') assert self.poses2d.shape[1] == self.pose2d_jointset.NUM_JOINTS
def eval_poses(matched_only, is_relative, pose3d_type, preds_2d_kpt, preds_3d_kpt, keep_matching=False): """ Calculates the PCK and AUC. This function is equivalent to ``mpii_mupots_multiperson_eval.m``. It performs the same gt scaling transformation, uses the same joints for matching and evaluation. :param matched_only: True if only detected poses count towards the PCK and AUC :param is_relative: True if relative error is calculated :param pose3d_type: 'annot3' or 'univ_annot3' :param preds_2d_kpt: seq->list(ndarray(nPoses,17,2)), in MuPo-TS joint order. 2D pose predictions. :param preds_3d_kpt: seq->list(ndarray(nPoses,17,2)), in MuPo-TS joint order. 3D pose predictions. :param keep_matching: if True, the preds_2d_kpt arrays are assumed to be already matched with gt. Otherwise, the matching algorithm in mpii_map_to_gt_bone_lengths is used. :return: two dicts from seq name to pck and auc """ # Joints used in original evaluation script joints_for_matching = np.arange(1, 14) # Joints used to match up the 2D poses joint_groups = [['Head', [0]], ['Neck', [1]], ['Shou', [2, 5]], ['Elbow', [3, 6]], ['Wrist', [4, 7]], ['Hip', [8, 11]], ['Knee', [9, 12]], ['Ankle', [10, 13]]] scored_joints = np.concatenate( [x[1] for x in joint_groups]) # Those joints that take part in scoring my_matching_inds = [] all_perjoint_errors = {} pck_by_sequence = {} auc_by_sequence = {} for seq in range(1, 21): gt = load_gt_annotations(seq) num_frames = gt['annot2'].shape[0] gt_poses = [] pred_poses = [] valid_pred = [] for i in range(num_frames): gt_pose_2d = gt['annot2'][i][gt['isValidFrame'][i]] gt_pose_3d = gt[pose3d_type][i][gt['isValidFrame'][i]] # gt_visibility = ~gt['occlusions'][i][gt['isValidFrame'][i]] gt_visibility = np.ones(gt_pose_2d.shape[:2], dtype='bool') pred_pose_2d = preds_2d_kpt[seq][i] pred_pose_3d = preds_3d_kpt[seq][i] pred_visibility = np.ones(pred_pose_2d.shape[:2], dtype='bool') # matching between 2D points if keep_matching: pair_inds = np.arange( gt['annot2'].shape[1])[gt['isValidFrame'][i]] else: pair_inds = _match_poses( gt_pose_2d[:, joints_for_matching], gt_visibility[:, joints_for_matching], pred_pose_2d[:, joints_for_matching], pred_visibility[:, joints_for_matching], 40) my_matching_inds.append(pair_inds) has_pair = pair_inds >= 0 # Reorder predicted poses to match Gt poses. If a GT pose does not have a pair, it is filled with 1e5 reordered_pose_3d = 100000 * np.ones_like( gt_pose_3d) # (nGtPoses, nJoints, 3) reordered_pose_3d[has_pair] = pred_pose_3d[ pair_inds[has_pair]] # (nGtPoses, nJoints, 3) gt_poses.append(gt_pose_3d) pred_poses.append(reordered_pose_3d) valid_pred.append(has_pair) gt_poses = np.concatenate(gt_poses) pred_poses = np.concatenate(pred_poses) valid_pred = np.concatenate(valid_pred) if is_relative: hip_ind = MuPoTSJoints().index_of('hip') gt_poses -= gt_poses[:, [hip_ind]] pred_poses -= pred_poses[:, [hip_ind]] # calculating per joint errors pred_poses = _scale_to_gt(pred_poses, gt_poses) pred_poses[~valid_pred] = 100000 errors = np.linalg.norm(gt_poses - pred_poses, axis=2) # (nGtPoses, nJoints) if matched_only: errors = errors[valid_pred] pck_by_sequence[seq] = np.mean(errors[:, scored_joints] < 150) * 100 auc_by_sequence[seq] = np.mean( [np.mean(errors[:, scored_joints] < t) for t in AUC_THRESHOLDS]) * 100 all_perjoint_errors[seq] = errors return pck_by_sequence, auc_by_sequence
def post_process_func(x): return combine_pose_and_trans(x, std3d, mean3d, MuPoTSJoints(), 'hip')
def optimize_poses(pred3d, data, _config, **kwargs): """ Runs the optimisation process on the dataset defined by resulsts. Parameters: pred3d: poses predicted by VideoPose, aligned with dataset dataset: dataset describing _config: dictionary of additional parameters """ _config = dict(_config) _config.update(kwargs) joint_set = MuPoTSJoints() seqs = np.unique(data.index.seq) if isinstance(pred3d, torch.Tensor): smoothed_pred = np.zeros(pred3d.shape) else: smoothed_pred = np.zeros_like(pred3d) # (20899, 17, 3) losses = [] for seq in seqs: inds = data.index.seq == seq # (20899,) poses_init = abs_to_hiprel(pred3d[inds].copy(), joint_set).astype('float32') / 1000 # (201, 17, 3) # interpolate invisible poses, if required visible_poses = data.good_poses[inds] # (201,) poses_pred = poses_init.copy() # (201, 17, 3) kp_score = np.mean(data.poses2d[inds, :, 2], axis=-1) # (201,) if _config['smooth_visibility']: kp_score = ndimage.median_filter(kp_score, 9) kp_score = torch.from_numpy(kp_score).cuda() # [201] poses_init = torch.from_numpy(poses_init).cuda() # [201, 17, 3] poses_pred = torch.from_numpy(poses_pred).cuda() # [201, 17, 3] scale = torch.ones((len(kp_score), 1, 1)) # torch.Size([201, 1, 1]) poses_init.requires_grad = False poses_pred.requires_grad = True # TODO set to False kp_score.requires_grad = False scale.requires_grad = False optimizer = get_optimizer([poses_pred], _config) for i in range(_config['num_iter']): # smoothing formulation if _config['pose_loss'] == 'gm': pose_loss = torch.sum(kp_score.view(-1, 1, 1) * gmloss(poses_pred - poses_init, _config['gm_alpha'])) elif _config['pose_loss'] == 'capped_l2': pose_loss = torch.sum(kp_score.view(-1, 1, 1) * capped_l2(poses_pred - poses_init, torch.tensor(_config['l2_cap']).float().cuda())) elif _config['pose_loss'] == 'capped_l2_euc_err': pose_loss = torch.sum(kp_score.view(-1, 1) * capped_l2_euc_err(poses_pred, poses_init, torch.tensor(_config['l2_cap']).float().cuda())) else: raise NotImplementedError('Unknown pose_loss' + _config['pose_loss']) velocity_loss_hip = torch.sum(globals()[_config['smoothness_loss_hip']](poses_pred[:, [0], :], 1)) step = _config['smoothness_loss_hip_largestep'] vel_loss = globals()[_config['smoothness_loss_hip']](poses_pred[:, [0], :], step) velocity_loss_hip_large = torch.sum((1 - kp_score[-len(vel_loss):]) * vel_loss) velocity_loss_rel = torch.sum(globals()[_config['smoothness_loss_rel']](poses_pred[:, 1:, :], 1)) vel_loss = globals()[_config['smoothness_loss_rel']](poses_pred[:, 1:, :], step) velocity_loss_rel_large = torch.sum((1 - kp_score[-len(vel_loss):]) * vel_loss) total_loss = pose_loss + _config['smoothness_weight_hip'] * velocity_loss_hip \ + _config['smoothness_weight_hip_large'] * velocity_loss_hip_large \ + _config['smoothness_weight_rel'] * velocity_loss_rel \ + _config['smoothness_weight_rel_large'] * velocity_loss_rel_large # np.savez("pose_ref.npz", # total_loss=total_loss.detach().cpu(), # pose_loss=pose_loss.detach().cpu(), # velocity_loss_hip=velocity_loss_hip.detach().cpu(), # velocity_loss_hip_large=velocity_loss_hip_large.detach().cpu(), # velocity_loss_rel=velocity_loss_rel.detach().cpu(), # velocity_loss_rel_large=velocity_loss_rel_large.detach().cpu(), # ) # exit() optimizer.zero_grad() total_loss.backward() optimizer.step() poses_init = poses_init.detach().cpu().numpy() * 1000 poses_pred = poses_pred.detach().cpu().numpy() * 1000 poses_init = add_back_hip(poses_init, joint_set) poses_pred = add_back_hip(poses_pred, joint_set) smoothed_pred[inds] = poses_pred losses.append(total_loss.item()) if _config.get('print_loss', False): print('Avg loss:', np.mean(losses)) return smoothed_pred
def __init__(self, pose2d_type, pose3d_scaling): """ Loads MuPoTS dataset but only those images where at least one person was detected. Each person on a frame is loaded separately. """ assert pose3d_scaling in ['univ', 'normal'] self.pose2d_jointset = FilteredSinglePersonMuPoTsDataset.get_jointset( pose2d_type) self.pose3d_jointset = MuPoTSJoints() poses2d = [] poses3d = [] pred_cdepths = [] index = [] for seq in range(1, 21): depth_width = 512 depth_height = 512 if seq <= 5 else 288 gt = mupots_3d.load_gt_annotations(seq) op = mupots_3d.load_2d_predictions(seq, pose2d_type) pose2d = op['pose'] pose3d = gt['annot3' if pose3d_scaling == 'normal' else 'univ_annot3'] depth = mupots_3d.load_jointwise_depth(seq) good_poses = gt['isValidFrame'].squeeze() good_poses = np.logical_and(good_poses, op['valid_pose']) orig_frame = np.tile( np.arange(len(good_poses)).reshape((-1, 1)), (1, good_poses.shape[1])) orig_pose = np.tile( np.arange(good_poses.shape[1]).reshape((1, -1)), (good_poses.shape[0], 1)) assert pose2d.shape[:2] == good_poses.shape # (nFrames, nPeople) assert pose3d.shape[:2] == good_poses.shape assert depth.shape[:2] == good_poses.shape assert orig_frame.shape == good_poses.shape assert orig_pose.shape == good_poses.shape assert pose2d.shape[2:] == (self.pose2d_jointset.NUM_JOINTS, 3) assert pose3d.shape[2:] == (17, 3) assert good_poses.ndim == 2 # Keep only those poses where good_poses is True pose2d = pose2d[good_poses] pose3d = pose3d[good_poses] orig_frame = orig_frame[good_poses] orig_pose = orig_pose[good_poses] depth = depth[good_poses] index.extend([(seq, orig_frame[i], orig_pose[i], depth_width, depth_height) for i in range(len(orig_frame))]) assert len(pose2d) == len(pose3d) poses2d.append(pose2d) poses3d.append(pose3d) pred_cdepths.append(depth) self.poses2d = np.concatenate(poses2d).astype('float32') self.poses3d = np.concatenate(poses3d).astype('float32') self.pred_cdepths = np.concatenate(pred_cdepths).astype('float32') self.index = np.rec.array(index, dtype=[('seq', 'int32'), ('frame', 'int32'), ('pose', 'int32'), ('depth_width', 'int32'), ('depth_height', 'int32')]) # Load calibration matrices N = len(self.poses2d) self.fx = np.zeros(N, dtype='float32') self.fy = np.zeros(N, dtype='float32') self.cx = np.zeros(N, dtype='float32') self.cy = np.zeros(N, dtype='float32') mupots_calibs = mupots_3d.get_calibration_matrices() for seq in range(1, 21): inds = (self.index.seq == seq) self.fx[inds] = mupots_calibs[seq][0, 0] self.fy[inds] = mupots_calibs[seq][1, 1] self.cx[inds] = mupots_calibs[seq][0, 2] self.cy[inds] = mupots_calibs[seq][1, 2] assert np.all(self.fx > 0), "Some fields were not filled" assert np.all(self.fy > 0), "Some fields were not filled" assert np.all(np.abs(self.cx) > 0), "Some fields were not filled" assert np.all(np.abs(self.cy) > 0), "Some fields were not filled" self.transform = None
def run(**kwargs): refine_config = load("scripts/nn_refine_config.json") for k, v in kwargs.items(): refine_config[k] = v exp = Experiment( workspace="pose-refinement", project_name="08-nn-ref-bone-length", display_summary_level=0, ) exp.log_parameters(refine_config) model_name = refine_config["model_name"] config, model = load_model(model_name) test_set = get_dataset(config) post_process_func = extract_post(model_name, test_set, config) joint_set = MuPoTSJoints() connected_joints = joint_set.LIMBGRAPH pad = (model.receptive_field() - 1) // 2 generator = UnchunkedGeneratorWithGT(test_set, pad, True) seqs = sorted(np.unique(test_set.index.seq)) optimized_preds_list = defaultdict(list) max_batch = len(generator) exp.log_parameter("max_batch", max_batch) for curr_batch, (pose2d, valid, pose3d) in enumerate(generator): exp.log_parameter("curr_batch", curr_batch) exp.log_parameter("curr_batch%", curr_batch / max_batch) if refine_config["full_batch"]: max_item = 1 else: max_item = valid.shape[-1] for curr_item in range(max_item): if not refine_config["full_batch"]: exp.log_parameter("curr_item", curr_item) exp.log_parameter("curr_item%", curr_item / max_item) if (curr_item + 1) > ( max_item - refine_config["smoothness_loss_hip_largestep"] ): reverse = True f = curr_item - refine_config["smoothness_loss_hip_largestep"] t = curr_item + 1 else: reverse = False f = curr_item t = f + refine_config["smoothness_loss_hip_largestep"] + 1 model_ = copy.deepcopy(model) optimizer = get_optimizer(model_.parameters(), refine_config) max_iter = refine_config["num_iter"] for curr_iter in range(max_iter): exp.log_parameter("curr_iter", curr_iter) exp.log_parameter("curr_iter%", curr_iter / max_iter) optimizer.zero_grad() seq = seqs[curr_batch] if refine_config["full_batch"]: nn_input = pose2d valid_ = valid[0] else: nn_input = pose2d[:, f : t + 2 * pad, :] valid_ = valid[0][f:t] pred3d = model_( torch.from_numpy(nn_input).cuda() ) # [2, 401, 42] -> [2, 21+2*13, 42], pred3d: [21, 16, 3] pred_real_pose = post_process_func( pred3d[0], seq ) # unnormalized output pred_real_pose_aug = post_process_func(pred3d[1], seq) pred_real_pose_aug[:, :, 0] *= -1 pred_real_pose_aug = test_set.pose3d_jointset.flip(pred_real_pose_aug) pred_real_pose = (pred_real_pose + pred_real_pose_aug) / 2 pred = pred_real_pose[valid_] gt_pose = post_process_func(pose3d[0], seq) inds = test_set.index.seq == seq poses_pred = abs_to_hiprel(pred, joint_set) / 1000 # (201, 17, 3) if refine_config["reinit"] or (curr_iter == 0): poses_init = poses_pred.detach().clone() poses_init.requires_grad = False if not refine_config["full_batch"]: kp_score = np.mean(test_set.poses2d[inds, :, 2], axis=-1)[ f:t ] # (201,) else: kp_score = np.mean( test_set.poses2d[inds, :, 2], axis=-1 ) # (201,) # if refine_config['smooth_visibility']: # kp_score = ndimage.median_filter(kp_score, 9) kp_score = torch.from_numpy(kp_score).cuda() # [201] scale = torch.ones((len(kp_score), 1, 1)) # torch.Size([201, 1, 1]) kp_score.requires_grad = False scale.requires_grad = False # smoothing formulation if refine_config["pose_loss"] == "gm": pose_loss = kp_score.view(-1, 1, 1) * gmloss( poses_pred - poses_init, refine_config["gm_alpha"] ) elif refine_config["pose_loss"] == "capped_l2": pose_loss = kp_score.view(-1, 1, 1) * capped_l2( poses_pred - poses_init, torch.tensor(refine_config["l2_cap"]).float().cuda(), ) elif refine_config["pose_loss"] == "capped_l2_euc_err": pose_loss = kp_score.view(-1, 1) * capped_l2_euc_err( poses_pred, poses_init, torch.tensor(refine_config["l2_cap"]).float().cuda(), ) else: raise NotImplementedError( "Unknown pose_loss" + refine_config["pose_loss"] ) velocity_loss_hip = globals()[refine_config["smoothness_loss_hip"]]( poses_pred[:, [0], :], 1 ) step = refine_config["smoothness_loss_hip_largestep"] vel_loss = globals()[refine_config["smoothness_loss_hip"]]( poses_pred[:, [0], :], step ) velocity_loss_hip_large = (1 - kp_score[-len(vel_loss) :]) * vel_loss velocity_loss_rel = globals()[refine_config["smoothness_loss_rel"]]( poses_pred[:, 1:, :], 1 ) vel_loss = globals()[refine_config["smoothness_loss_rel"]]( poses_pred[:, 1:, :], step ) velocity_loss_rel_large = (1 - kp_score[-len(vel_loss) :]) * vel_loss prefix = f"{curr_batch}_{curr_item}" if refine_config["full_batch"]: total_loss = ( torch.sum(pose_loss) + refine_config["smoothness_weight_hip"] * torch.sum(velocity_loss_hip) + refine_config["smoothness_weight_hip_large"] * torch.sum(velocity_loss_hip_large) + refine_config["smoothness_weight_rel"] * torch.sum(velocity_loss_rel) + refine_config["smoothness_weight_rel_large"] * torch.sum(velocity_loss_rel_large) ) m = { f"{prefix}_total_loss": total_loss, f"{prefix}_pose_loss": torch.sum(pose_loss), f"{prefix}_velocity_loss_hip": torch.sum(velocity_loss_hip), f"{prefix}_velocity_loss_hip_large": torch.sum( velocity_loss_hip_large ), f"{prefix}_velocity_loss_rel": torch.sum(velocity_loss_rel), f"{prefix}_velocity_loss_rel_large": torch.sum( velocity_loss_rel_large ), } else: neighbour_dist_idx = 0 if not reverse else -1 total_loss = ( torch.sum(pose_loss[neighbour_dist_idx,]) + refine_config["smoothness_weight_hip"] * velocity_loss_hip[[neighbour_dist_idx]] + refine_config["smoothness_weight_hip_large"] * velocity_loss_hip_large + refine_config["smoothness_weight_rel"] * velocity_loss_rel[[neighbour_dist_idx]] + refine_config["smoothness_weight_rel_large"] * velocity_loss_rel_large ) m = { f"{prefix}_total_loss": total_loss[0], f"{prefix}_pose_loss": torch.sum( pose_loss[neighbour_dist_idx,] ), f"{prefix}_velocity_loss_hip": velocity_loss_hip[ neighbour_dist_idx ], f"{prefix}_velocity_loss_hip_large": velocity_loss_hip_large[0], f"{prefix}_velocity_loss_rel": velocity_loss_rel[ neighbour_dist_idx ], f"{prefix}_velocity_loss_rel_large": velocity_loss_rel_large[0], } if refine_config["bone_weight"] != 0: assert refine_config["full_batch"] err = get_bone_lengths(pred, connected_joints) bone_err = ( torch.mean(torch.std(err, dim=0)) * refine_config["bone_weight"] ) # [cs] total_loss += bone_err m["bone_err"] = bone_err gt_bones = get_bone_lengths( torch.from_numpy(gt_pose), connected_joints ) gt_bones = torch.mean(gt_bones, dim=0) length_err = torch.nn.functional.mse_loss(err, gt_bones.cuda()) * refine_config["bone_length_weight"] total_loss += length_err m["bone_length_err"] = length_err total_loss.backward() optimizer.step() # print(m) # m = {k: v.detach().cpu().numpy() for k, v in m.items()} # exp.log_metrics(m, step=curr_iter) os.makedirs("nn_refs", exist_ok=True) np.save(f"nn_refs/{seq.replace('/', '_')}.npy", pred.cpu().detach().numpy()) if refine_config["full_batch"]: optimized_preds_list[seq].append( add_back_hip(poses_pred.detach().cpu().numpy() * 1000, joint_set) ) else: optimized_preds_list[seq].append( add_back_hip( poses_pred[[neighbour_dist_idx]].detach().cpu().numpy() * 1000, joint_set, ) ) pred = {k: np.concatenate(v) for k, v in optimized_preds_list.items()} pred = TemporalMupotsEvaluator._group_by_seq(pred) pred = np.concatenate([pred[i] for i in range(1, 21)]) l = StackedArrayAllMupotsEvaluator(pred, test_set, True, prefix="R") l.eval(calculate_scale_free=True, verbose=True) exp.log_metrics(l.losses_to_log) pred_by_seq = {} for seq in range(1, 21): inds = test_set.index.seq_num == seq pred_by_seq[seq] = pred[inds] pred_2d, pred_3d = unstack_mupots_poses(test_set, pred_by_seq) print("\nR-PCK R-AUC A-PCK A-AUC") keys = ["R-PCK", "R-AUC", "A-PCK", "A-AUC"] values = [] for relative in [True, False]: pcks, aucs = mupots_3d.eval_poses( False, relative, "annot3" if config["pose3d_scaling"] == "normal" else "univ_annot3", pred_2d, pred_3d, keep_matching=True, ) pck = np.mean(list(pcks.values())) auc = np.mean(list(aucs.values())) values.append(pck) values.append(auc) print(" %4.1f %4.1f " % (pck, auc), end="") print() exp.log_metrics({curr_iter: v for curr_iter, v in zip(keys, values)})
def __init__(self, pose2d_type, pose3d_scaling, v='v1'): assert pose2d_type == 'hrnet', "only hrnet is implemented" assert pose3d_scaling in ['univ', 'normal'] self.transform = None self.pose2d_jointset = PersonStackedMuPoTsDataset.get_jointset( pose2d_type) self.pose3d_jointset = MuPoTSJoints() pose3d_key = 'annot3' if pose3d_scaling == 'normal' else 'univ_annot3' poses2d = [] poses3d = [] valid_2d_pred = [] # True if HR-net found a pose fx = [] fy = [] cx = [] cy = [] index = [] calibs = mpii_3dhp.get_calibration_matrices() meta_data = muco_temp.get_metadata(v=v) for cam in range(11): gt = muco_temp.load_gt(cam, v=v) for vid in range(7): orig_shape = gt[vid][ pose3d_key].shape # (nFrames, nPoses, nJoints, 3) poses3d.append(_column_stack(gt[vid][pose3d_key])) kp = muco_temp.load_hrnet(cam, vid, v=v) poses2d.append(_column_stack(kp['poses'])) valid_2d_pred.append(_column_stack(kp['is_valid'])) assert len(poses3d[-1]) == len(poses2d[-1]), \ "Gt and predicted frames are not aligned, cam:" + str(cam) orig_frame = np.tile( np.arange(orig_shape[0]).reshape(-1, 1), (1, orig_shape[1])) orig_pose = np.tile( np.arange(orig_shape[1]).reshape(1, -1), (orig_shape[0], 1)) orig_frame = _column_stack(orig_frame) # (nFrames*nPoses,) orig_pose = _column_stack(orig_pose) index.extend([('%d/%d/%d' % (cam, vid, orig_pose[i]), cam, vid, orig_frame[i], orig_pose[i]) for i in range(len(orig_frame))]) for pose_ind in range(orig_shape[1]): sub, seq, _ = meta_data[cam][vid][pose_ind] calibration_mx = calibs[(sub, seq, cam)] fx.extend([calibration_mx[0, 0]] * orig_shape[0]) fy.extend([calibration_mx[1, 1]] * orig_shape[0]) cx.extend([calibration_mx[0, 2]] * orig_shape[0]) cy.extend([calibration_mx[1, 2]] * orig_shape[0]) self.poses2d = np.concatenate(poses2d) self.poses3d = np.concatenate(poses3d) self.valid_2d_pred = np.concatenate(valid_2d_pred) self.index = np.rec.array(index, dtype=[('seq', 'U12'), ('cam', 'int32'), ('vid', 'int32'), ('frame', 'int32'), ('pose', 'int32')]) self.fx = np.array(fx, dtype='float32') self.fy = np.array(fy, dtype='float32') self.cx = np.array(cx, dtype='float32') self.cy = np.array(cy, dtype='float32') assert len(self.poses2d) == len(self.index), len(self.index) assert len(self.poses2d) == len(self.poses3d) assert len(self.poses2d) == len(self.index), len(self.index) assert len(self.poses2d) == len(self.valid_2d_pred), len( self.valid_2d_pred) assert len(self.poses2d) == len(self.fx), len(self.fx) assert len(self.poses2d) == len(self.fy), len(self.fy) assert len(self.poses2d) == len(self.cx), len(self.cx) assert len(self.poses2d) == len(self.cy), len(self.cy)
def __init__(self, pose2d_type, pose3d_scaling, cap_at_25fps, stride=1): assert pose2d_type == 'hrnet', "Only hrnet 2d is implemented" assert pose3d_scaling in ['normal', 'univ'], \ "Unexpected pose3d scaling type: " + str(pose3d_scaling) self.transform = None pose3d_key = 'annot3' if pose3d_scaling == 'normal' else 'univ_annot3' poses2d = [] poses3d = [] valid_2d_pred = [] # True if HR-net found a pose fx = [] fy = [] cx = [] cy = [] index = [] sequences = [] calibs = mpii_3dhp.get_calibration_matrices() for sub in range(1, 9): # S1, ..., S8 for seq in range(1, 3): # 2 sequence per S gt = mpii_3dhp.train_ground_truth(sub, seq) for cam in range(11): # In S3/Seq2 cam2 there are some frame between 9400-9900 where the pose is # behind the camera/nearly in the camera plane. This breaks training. # For simplicity, ignore the whole set but ignoring frames 9400-9900 # would also work if seq == 2 and sub == 3 and cam == 2: continue # Find indices that are selected for the dataset inds = np.arange(len(gt[pose3d_key][cam])) if cap_at_25fps and mpii_3dhp.get_train_fps(sub, seq) == 50: inds = inds[::2] inds = inds[::stride] num_frames = len(inds) poses3d.append(gt[pose3d_key][cam][inds]) tmp = mpii_3dhp.train_poses_hrnet(sub, seq, cam) poses2d.append(tmp['poses'][inds]) valid_2d_pred.append(tmp['is_valid'][inds]) assert len(poses3d[-1]) == len( poses2d[-1] ), "Gt and predicted frames are not aligned, seq:" + str( seq) seq_name = 'S%d/Seq%d/%d' % (sub, seq, cam) sequences.append(seq_name) index.extend([(seq_name, sub, seq, cam, i) for i in inds]) calibration_mx = calibs[(sub, seq, cam)] fx.extend([calibration_mx[0, 0]] * num_frames) fy.extend([calibration_mx[1, 1]] * num_frames) cx.extend([calibration_mx[0, 2]] * num_frames) cy.extend([calibration_mx[1, 2]] * num_frames) self.pose2d_jointset = CocoExJoints() self.pose3d_jointset = MuPoTSJoints() self.poses2d = np.concatenate(poses2d) self.poses3d = np.concatenate(poses3d) self.valid_2d_pred = np.concatenate(valid_2d_pred) self.index = np.rec.array(index, dtype=[('seq', 'U12'), ('sub', 'int32'), ('subseq', 'int32'), ('cam', 'int32'), ('frame', 'int32')]) self.fx = np.array(fx, dtype='float32') self.fy = np.array(fy, dtype='float32') self.cx = np.array(cx, dtype='float32') self.cy = np.array(cy, dtype='float32') self.sequences = sorted(sequences) assert len(self.poses2d) == len(self.index), len(self.index) assert len(self.poses2d) == len(self.poses3d) assert len(self.poses2d) == len(self.index), len(self.index) assert len(self.poses2d) == len(self.valid_2d_pred), len( self.valid_2d_pred) assert len(self.poses2d) == len(self.fx), len(self.fx) assert len(self.poses2d) == len(self.fy), len(self.fy) assert len(self.poses2d) == len(self.cx), len(self.cx) assert len(self.poses2d) == len(self.cy), len(self.cy)
def __init__(self, pose2d_type, pose3d_scaling, eval_frames_only=False): assert pose2d_type == 'hrnet', "Only hrnet 2d is implemented" assert pose3d_scaling in ['normal', 'univ'], \ "Unexpected pose3d scaling type: " + str(pose3d_scaling) self.transform = None self.eval_frames_only = eval_frames_only pose3d_key = 'annot3' if pose3d_scaling == 'normal' else 'univ_annot3' poses2d = [] poses3d = [] valid_2d_pred = [] # True if HR-net found a pose valid_frame = [] # True if MPI-INF-3DHP marked the frame as valid fx = [] fy = [] cx = [] cy = [] width = [] index = [] for seq in range(1, 7): gt = h5py.File( os.path.join(mpii_3dhp.MPII_3DHP_PATH, 'mpi_inf_3dhp_test_set', 'TS%d' % seq, 'annot_data.mat'), 'r') poses3d.append(gt[pose3d_key][:, 0]) valid_frame.append(gt['valid_frame'][()] == 1) num_frames = len( poses3d[-1] ) # The annotations are shorter than the number of images tmp = mpii_3dhp.test_poses_hrnet(seq) poses2d.append(tmp['poses']) valid_2d_pred.append(tmp['is_valid']) assert len(poses3d[-1]) == len( poses2d[-1] ), "Gt and predicted frames are not aligned, seq:" + str(seq) index.extend([(seq, i) for i in range(num_frames)]) calibration_mx = mpii_3dhp.get_test_calib(seq) fx.extend([calibration_mx[0, 0]] * num_frames) fy.extend([calibration_mx[1, 1]] * num_frames) cx.extend([calibration_mx[0, 2]] * num_frames) cy.extend([calibration_mx[1, 2]] * num_frames) width.extend([2048 if seq < 5 else 1920] * num_frames) self.pose2d_jointset = CocoExJoints() self.pose3d_jointset = MuPoTSJoints() self.poses2d = np.concatenate(poses2d) self.poses3d = np.concatenate(poses3d) self.valid_2d_pred = np.concatenate(valid_2d_pred) valid_frame = np.concatenate(valid_frame) assert valid_frame.shape[1] == 1, valid_frame.shape valid_frame = valid_frame[:, 0] self.index = np.rec.array(index, dtype=[('seq', 'int32'), ('frame', 'int32')]) self.fx = np.array(fx, dtype='float32') self.fy = np.array(fy, dtype='float32') self.cx = np.array(cx, dtype='float32') self.cy = np.array(cy, dtype='float32') self.width = np.array(width, dtype='int32') assert len(self.poses2d) == len(self.index), len(self.index) # keep only those frame where a pose was detected good_poses = self.valid_2d_pred.copy() if eval_frames_only: good_poses = good_poses & valid_frame self.good_poses = good_poses assert len(self.poses2d) == len(self.poses3d) assert len(self.poses2d) == len(self.index), len(self.index) assert len(self.poses2d) == len(self.valid_2d_pred), len( self.valid_2d_pred) assert len(self.poses2d) == len(self.fx), len(self.fx) assert len(self.poses2d) == len(self.fy), len(self.fy) assert len(self.poses2d) == len(self.cx), len(self.cx) assert len(self.poses2d) == len(self.cy), len(self.cy) assert len(self.poses2d) == len(self.width), len(self.width) assert len(self.poses2d) == len(self.good_poses), len(self.good_poses)
def __init__(self, pose2d_type, pose3d_scaling, pose_validity='detected_only', hip_threshold=-1): """ Loads MuPoTS dataset but only those images where at least one person was detected. Each person on a frame is loaded separately. :param pose_validity: one of 'all', 'detected_only', 'valid_only'; specifies which poses are marked valid all - all of them; valid_only - those that are valid according to the GT annotations detected_only - those that were successfuly detected by the 2D algon and also valid :param hip_threshold: only those poses are loaded, where the score of the hip is larger than this value :param filter_incorrect_match: MuPoTS's pose matching script has some erroneous matching. If filter_incorrect_match is True, these are not loaded. """ assert pose_validity in ['all', 'detected_only', 'valid_only'] assert pose3d_scaling in ['univ', 'normal'] self.pose2d_jointset = PersonStackedMuPoTsDataset.get_jointset( pose2d_type) self.pose3d_jointset = MuPoTSJoints() self.pose3d_scaling = pose3d_scaling pred2d_root_ind = self.pose2d_jointset.index_of('hip') poses2d = [] poses3d = [] joint3d_visible = [] all_good_poses = [] valid_annotations = [] width = [] index = [] for seq in range(1, 21): img_width, img_height = mupots_3d.image_size(seq) gt = mupots_3d.load_gt_annotations(seq) pred2d = mupots_3d.load_2d_predictions(seq, pose2d_type) pose2d = pred2d['pose'] pose3d = gt['annot3' if pose3d_scaling == 'normal' else 'univ_annot3'] visibility = ~gt['occlusions'] if pose_validity == 'all': good_poses = np.full(pose3d.shape[:2], True, dtype='bool') elif pose_validity == 'valid_only': good_poses = gt['isValidFrame'].squeeze() elif pose_validity == 'detected_only': good_poses = gt['isValidFrame'].squeeze() good_poses = np.logical_and(good_poses, pred2d['valid_pose']) good_poses = np.logical_and( good_poses, pose2d[:, :, pred2d_root_ind, 2] > hip_threshold) else: raise NotImplementedError("Unknown pose_validity value:" + pose_validity) orig_frame = np.tile( np.arange(len(good_poses)).reshape(-1, 1), (1, good_poses.shape[1])) orig_pose = np.tile( np.arange(good_poses.shape[1]).reshape(1, -1), (good_poses.shape[0], 1)) assert pose2d.shape[:2] == good_poses.shape # (nFrames, nPeople) assert pose3d.shape[:2] == good_poses.shape assert orig_frame.shape == good_poses.shape assert orig_pose.shape == good_poses.shape assert pose2d.shape[2:] == (self.pose2d_jointset.NUM_JOINTS, 3) assert pose3d.shape[2:] == (17, 3) assert visibility.shape[2] == 17 assert good_poses.ndim == 2 orig_frame = _column_stack(orig_frame) orig_pose = _column_stack(orig_pose) index.extend([('%d/%d' % (seq, orig_pose[i]), seq, orig_frame[i], orig_pose[i]) for i in range(len(orig_frame))]) poses2d.append(_column_stack(pose2d)) poses3d.append(_column_stack(pose3d)) joint3d_visible.append(_column_stack(visibility)) all_good_poses.append(_column_stack(good_poses)) valid_annotations.append(_column_stack(gt['isValidFrame'])) width.extend([img_width] * len(orig_frame)) self.poses2d = np.concatenate(poses2d).astype('float32') self.poses3d = np.concatenate(poses3d).astype('float32') self.joint3d_visible = np.concatenate(joint3d_visible) self.good_poses = np.concatenate(all_good_poses) self.valid_annotations = np.concatenate(valid_annotations) self.width = np.array(width) self.index = np.rec.array(index, dtype=[('seq', 'U5'), ('seq_num', 'int32'), ('frame', 'int32'), ('pose', 'int32')]) assert self.valid_annotations.shape == self.good_poses.shape assert len(self.valid_annotations) == len(self.poses2d) # Load calibration matrices N = len(self.poses2d) self.fx = np.zeros(N, dtype='float32') self.fy = np.zeros(N, dtype='float32') self.cx = np.zeros(N, dtype='float32') self.cy = np.zeros(N, dtype='float32') mupots_calibs = mupots_3d.get_calibration_matrices() for seq in range(1, 21): inds = (self.index.seq_num == seq) self.fx[inds] = mupots_calibs[seq][0, 0] self.fy[inds] = mupots_calibs[seq][1, 1] self.cx[inds] = mupots_calibs[seq][0, 2] self.cy[inds] = mupots_calibs[seq][1, 2] assert np.all(self.fx > 0), "Some fields were not filled" assert np.all(self.fy > 0), "Some fields were not filled" assert np.all(np.abs(self.cx) > 0), "Some fields were not filled" assert np.all(np.abs(self.cy) > 0), "Some fields were not filled" self.transform = None