def __init__(self, dataset_path, checkpoint_path, input_video_path=None, export_path=None, output_path=None, with_cude=False): self.with_cuda = with_cude self.dataset_path = dataset_path self.export_path = export_path self.output_path = output_path self.input_video_path = input_video_path self.dataset = CustomDataset(self.dataset_path) self.keypoints = None self.keypoints_left = None self.keypoints_right = None self.joints_left = None self.joints_right = None self.checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) self.model = None self.init_keypoints() self.valid_poses = self.keypoints["detectron2"]["custom"] self.init_model() self.test_generator = None self.init_generator() self.prediction = None self.make_prediction()
def load_dataset() -> MocapDataset: """ 加载数据集 Returns: dataset """ print('Loading custom dataset...') if args.dataset.startswith('custom'): # 自定义数据集是2d关键点集,用于预测3d关键点 from common.custom_dataset import CustomDataset dataset_ = CustomDataset('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') else: raise KeyError('Invalid dataset') return dataset_
def load_dataset() -> MocapDataset: """ 加载数据集 Returns: dataset """ print('Loading dataset...') dataset_path = 'data/data_3d_' + args.dataset + '.npz' if args.dataset == 'h36m': # Human3.6M的3d关键点数据集 from common.h36m_dataset import Human36mDataset dataset_ = Human36mDataset(dataset_path) elif args.dataset.startswith('humaneva'): # Human-eva的3d关键点数据集 from common.humaneva_dataset import HumanEvaDataset dataset_ = HumanEvaDataset(dataset_path) elif args.dataset.startswith('custom'): # 自定义数据集是2d关键点集,用于预测3d关键点 from common.custom_dataset import CustomDataset dataset_ = CustomDataset('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') else: raise KeyError('Invalid dataset') return dataset_
os.makedirs(args.checkpoint) except OSError as e: if e.errno != errno.EEXIST: raise RuntimeError('Unable to create checkpoint directory:', args.checkpoint) print('Loading dataset...') dataset_path = 'data/data_3d_' + args.dataset + '.npz' if args.dataset == 'h36m': from common.h36m_dataset import Human36mDataset dataset = Human36mDataset(dataset_path) elif args.dataset.startswith('humaneva'): from common.humaneva_dataset import HumanEvaDataset dataset = HumanEvaDataset(dataset_path) elif args.dataset.startswith('custom'): from common.custom_dataset import CustomDataset dataset = CustomDataset('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') else: raise KeyError('Invalid dataset') print('Preparing data...') for subject in dataset.subjects(): for action in dataset[subject].keys(): anim = dataset[subject][action] if 'positions' in anim: positions_3d = [] for cam in anim['cameras']: pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation']) pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position positions_3d.append(pos_3d) anim['positions_3d'] = positions_3d
from common.camera import * from common.model import * from common.loss import * from common.generators import ChunkedGenerator, UnchunkedGenerator from time import time from common.utils import deterministic_random args = parse_args() print(args) print('Loading dataset...') dataset_path = 'data/data_3d_' + args.dataset + '.npz' dataset_path = '/home/filipkr/Documents/xjob/data_2d_custom_lol-take2.npz' pose_path = '/home/filipkr/Documents/xjob/custom_2d_training.npz' dataset = CustomDataset(dataset_path) data = np.load(pose_path, allow_pickle=True) data = data['data'].item() print('Loading 2D detections...') keypoints_metadata = np.load(dataset_path, allow_pickle=True)['metadata'].item() # keypoints = np.load('data/data_2d_' + args.dataset + '_' + # args.keypoints + '.npz', allow_pickle=True) keypoints_symmetry = keypoints_metadata['keypoints_symmetry'] kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1]) joints_left, joints_right = list(dataset.skeleton().joints_left()), list( dataset.skeleton().joints_right()) # keypoints = keypoints['positions_2d'].item() keypoints = np.load(pose_path, allow_pickle=True)
except OSError as e: if e.errno != errno.EEXIST: raise RuntimeError('Unable to create checkpoint directory:', args.checkpoint) print('Loading dataset...') dataset_path = '../VideoPose3D-master/data/data_3d_' + args.dataset + '.npz' if args.dataset == 'h36m': from common.h36m_dataset import Human36mDataset dataset = Human36mDataset(dataset_path) elif args.dataset.startswith('humaneva'): from common.humaneva_dataset import HumanEvaDataset dataset = HumanEvaDataset(dataset_path) elif args.dataset.startswith('custom'): from common.custom_dataset import CustomDataset #modified dataset = CustomDataset('../VideoPose3D-master/data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') else: raise KeyError('Invalid dataset') print('Preparing data...') for subject in dataset.subjects(): for action in dataset[subject].keys(): anim = dataset[subject][action] if 'positions' in anim: positions_3d = [] for cam in anim['cameras']: pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation']) pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position positions_3d.append(pos_3d) anim['positions_3d'] = positions_3d
except OSError as e: if e.errno != errno.EEXIST: raise RuntimeError('Unable to create checkpoint directory:', args.checkpoint) print('Loading dataset...') dataset_path = 'data/data_3d_' + args.dataset + '.npz' if args.dataset == 'h36m': from common.h36m_dataset import Human36mDataset dataset = Human36mDataset(dataset_path) elif args.dataset.startswith('humaneva'): from common.humaneva_dataset import HumanEvaDataset dataset = HumanEvaDataset(dataset_path) elif args.dataset.startswith('custom'): from common.custom_dataset import CustomDataset dataset = CustomDataset(args.keypoints + '.npz') else: raise KeyError('Invalid dataset') print('Preparing data...') for subject in dataset.subjects(): for action in dataset[subject].keys(): anim = dataset[subject][action] if 'positions' in anim: positions_3d = [] for cam in anim['cameras']: pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation']) pos_3d[:,
except OSError as e: if e.errno != errno.EEXIST: raise RuntimeError('Unable to create checkpoint directory:', args.checkpoint) print('Loading dataset...') dataset_path = 'data/data_3d_' + args.dataset + '.npz' if args.dataset == 'h36m': from common.h36m_dataset import Human36mDataset dataset = Human36mDataset(dataset_path) elif args.dataset.startswith('humaneva'): from common.humaneva_dataset import HumanEvaDataset dataset = HumanEvaDataset(dataset_path) elif args.dataset.startswith('custom'): # bgnote - this is where we are from common.custom_dataset import CustomDataset dataset = CustomDataset(f"{args.keypoints}") else: raise KeyError('Invalid dataset') print('Loading 2D detections...') keypoints = np.load(args.keypoints, allow_pickle=True) keypoints_metadata = keypoints['metadata'].item() keypoints_symmetry = keypoints_metadata['keypoints_symmetry'] kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1]) joints_left, joints_right = [4, 5, 6, 11, 12, 13], [1, 2, 3, 14, 15, 16] keypoints = keypoints['positions_2d'].item() for subject in keypoints.keys(): for action in keypoints[subject]: for cam_idx, kps in enumerate(keypoints[subject][action]): # Normalize camera frame
print(args) try: # Create checkpoint directory if it does not exist os.makedirs(args.checkpoint) except OSError as e: if e.errno != errno.EEXIST: raise RuntimeError( 'Unable to create checkpoint directory:', args.checkpoint) print('ARGS EVAL:::::', args.evaluate) print('Loading dataset...') dataset_path = 'data/data_3d_' + args.dataset + '.npz' dataset = CustomDataset('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') # print(dataset.subjects()) print(dataset['009_FL_R.MTS']) dataset_poses = np.load('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz', allow_pickle=True) print('Preparing data...') for subject in dataset.subjects(): for action in dataset[subject].keys(): anim = dataset[subject][action] print(anim) if 'positions' in anim: positions_3d = [] for cam in anim['cameras']: pos_3d = world_to_camera(
# try: # # Create checkpoint directory if it does not exist # os.makedirs(args.checkpoint) # except OSError as e: # if e.errno != errno.EEXIST: # raise RuntimeError( # 'Unable to create checkpoint directory:', args.checkpoint) print('Loading dataset...') dataset_path = 'data/data_3d_' + args.dataset + '.npz' dataset_path = '/home/filipkr/Documents/xjob/data_2d_custom_lol-take2.npz' pose_path = '/home/filipkr/Documents/xjob/custom_2d_training.npz' # dataset = CustomDataset('data/data_2d_' + args.dataset + # '_' + args.keypoints + '.npz') dataset = CustomDataset(dataset_path) # print(dataset.subjects()) # print(dataset['009_FL_R.MTS']) # dataset_poses = np.load('data/data_2d_' + args.dataset + '_' + # args.keypoints + '.npz', allow_pickle=True) data = np.load(pose_path, allow_pickle=True) data = data['data'].item() print('Preparing data...') for subject in dataset.subjects(): for action in dataset[subject].keys(): anim = dataset[subject][action] print(anim) if 'positions' in anim:
from common.camera import * from common.custom_dataset import CustomDataset from common.generators import UnchunkedGenerator from common.loss import * from common.model import TemporalModel from common.utils import deterministic_random from common.visualization import render_animation custom_dataset = '/home/filip/Documents/VideoPose3D-master/data/data_2d_custom_baseball_george_2.npz' output_path = "/home/filip/Documents/VideoPose3D-master/baseball_george_1.mp4" chk_filename = '/home/filip/Documents/VideoPose3D-master/checkpoint/Model_3D.bin' input_video_path = '/home/filip/Documents/VideoPose3D-master/inputs/baseball_george_2.mp4' export_path = None dataset = CustomDataset(custom_dataset) print('Loading 2D detections...') keypoints = np.load(custom_dataset, allow_pickle=True) keypoints_metadata = keypoints['metadata'].item() keypoints_symmetry = keypoints_metadata['keypoints_symmetry'] kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1]) joints_left, joints_right = list(dataset.skeleton().joints_left()), list( dataset.skeleton().joints_right()) keypoints = keypoints['positions_2d'].item() for subject in keypoints.keys(): for action in keypoints[subject]: for cam_idx, kps in enumerate(keypoints[subject][action]): # Normalize camera frame cam = dataset.cameras()[subject][cam_idx] kps[..., :2] = normalize_screen_coordinates(kps[..., :2],
os.makedirs(args.checkpoint) except OSError as e: if e.errno != errno.EEXIST: raise RuntimeError('Unable to create checkpoint directory:', args.checkpoint) print('Loading dataset...') dataset_path = 'data/data_3d_' + args.dataset + '.npz' if args.dataset == 'h36m': from common.h36m_dataset import Human36mDataset dataset = Human36mDataset(dataset_path) elif args.dataset.startswith('custom'): from common.custom_dataset import CustomDataset dataset = CustomDataset('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') else: raise KeyError('Invalid dataset') print('Preparing data...') for subject in dataset.subjects(): for action in dataset[subject].keys(): anim = dataset[subject][action] if 'positions' in anim: positions_3d = [] for cam in anim['cameras']: pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation']) pos_3d[:,
def the_main_kaboose(args): print(args) try: # Create checkpoint directory if it does not exist os.makedirs(args.checkpoint) except OSError as e: if e.errno != errno.EEXIST: raise RuntimeError('Unable to create checkpoint directory:', args.checkpoint) print('Loading dataset...') dataset_path = 'data/data_3d_' + args.dataset + '.npz' if args.dataset == 'h36m': from common.h36m_dataset import Human36mDataset dataset = Human36mDataset(dataset_path) elif args.dataset.startswith('humaneva'): from common.humaneva_dataset import HumanEvaDataset dataset = HumanEvaDataset(dataset_path) elif args.dataset.startswith('custom'): from common.custom_dataset import CustomDataset dataset = CustomDataset('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') else: raise KeyError('Invalid dataset') print('Preparing data...') for subject in dataset.subjects(): for action in dataset[subject].keys(): anim = dataset[subject][action] # this only works when training. if 'positions' in anim: positions_3d = [] for cam in anim['cameras']: pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation']) pos_3d[:, 1:] -= pos_3d[:, : 1] # Remove global offset, but keep trajectory in first position positions_3d.append(pos_3d) anim['positions_3d'] = positions_3d print('Loading 2D detections...') keypoints = np.load('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz', allow_pickle=True) keypoints_metadata = keypoints['metadata'].item() keypoints_symmetry = keypoints_metadata['keypoints_symmetry'] kps_left, kps_right = list(keypoints_symmetry[0]), list( keypoints_symmetry[1]) joints_left, joints_right = list(dataset.skeleton().joints_left()), list( dataset.skeleton().joints_right()) keypoints = keypoints['positions_2d'].item() # THIS IS ABOUT TRAINING. ignore pls. for subject in dataset.subjects(): assert subject in keypoints, 'Subject {} is missing from the 2D detections dataset'.format( subject) for action in dataset[subject].keys(): assert action in keypoints[ subject], 'Action {} of subject {} is missing from the 2D detections dataset'.format( action, subject) if 'positions_3d' not in dataset[subject][action]: continue for cam_idx in range(len(keypoints[subject][action])): # We check for >= instead of == because some videos in H3.6M contain extra frames mocap_length = dataset[subject][action]['positions_3d'][ cam_idx].shape[0] assert keypoints[subject][action][cam_idx].shape[ 0] >= mocap_length if keypoints[subject][action][cam_idx].shape[0] > mocap_length: # Shorten sequence keypoints[subject][action][cam_idx] = keypoints[subject][ action][cam_idx][:mocap_length] assert len(keypoints[subject][action]) == len( dataset[subject][action]['positions_3d']) # normalize camera frame? for subject in keypoints.keys(): for action in keypoints[subject]: for cam_idx, kps in enumerate(keypoints[subject][action]): # Normalize camera frame cam = dataset.cameras()[subject][cam_idx] kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h']) keypoints[subject][action][cam_idx] = kps subjects_train = args.subjects_train.split(',') subjects_semi = [] if not args.subjects_unlabeled else args.subjects_unlabeled.split( ',') if not args.render: subjects_test = args.subjects_test.split(',') else: subjects_test = [args.viz_subject] semi_supervised = len(subjects_semi) > 0 if semi_supervised and not dataset.supports_semi_supervised(): raise RuntimeError( 'Semi-supervised training is not implemented for this dataset') def fetch(subjects, action_filter=None, subset=1, parse_3d_poses=True): out_poses_3d = [] out_poses_2d = [] out_camera_params = [] for subject in subjects: print("gonna check actions for subject " + subject) for subject in subjects: for action in keypoints[subject].keys(): if action_filter is not None: found = False for a in action_filter: if action.startswith(a): found = True break if not found: continue poses_2d = keypoints[subject][action] for i in range(len(poses_2d)): # Iterate across cameras out_poses_2d.append(poses_2d[i]) if subject in dataset.cameras(): cams = dataset.cameras()[subject] assert len(cams) == len(poses_2d), 'Camera count mismatch' for cam in cams: if 'intrinsic' in cam: out_camera_params.append(cam['intrinsic']) if parse_3d_poses and 'positions_3d' in dataset[subject][ action]: poses_3d = dataset[subject][action]['positions_3d'] assert len(poses_3d) == len( poses_2d), 'Camera count mismatch' for i in range(len(poses_3d)): # Iterate across cameras out_poses_3d.append(poses_3d[i]) if len(out_camera_params) == 0: out_camera_params = None if len(out_poses_3d) == 0: out_poses_3d = None stride = args.downsample if subset < 1: for i in range(len(out_poses_2d)): n_frames = int( round(len(out_poses_2d[i]) // stride * subset) * stride) start = deterministic_random( 0, len(out_poses_2d[i]) - n_frames + 1, str(len(out_poses_2d[i]))) out_poses_2d[i] = out_poses_2d[i][start:start + n_frames:stride] if out_poses_3d is not None: out_poses_3d[i] = out_poses_3d[i][start:start + n_frames:stride] elif stride > 1: # Downsample as requested for i in range(len(out_poses_2d)): out_poses_2d[i] = out_poses_2d[i][::stride] if out_poses_3d is not None: out_poses_3d[i] = out_poses_3d[i][::stride] return out_camera_params, out_poses_3d, out_poses_2d action_filter = None if args.actions == '*' else args.actions.split(',') if action_filter is not None: print('Selected actions:', action_filter) # when you run inference, this returns None, None, and the keypoints array renamed as poses_valid_2d cameras_valid, poses_valid, poses_valid_2d = fetch(subjects_test, action_filter) filter_widths = [int(x) for x in args.architecture.split(',')] if not args.disable_optimizations and not args.dense and args.stride == 1: # Use optimized model for single-frame predictions shape_2 = poses_valid_2d[0].shape[-2] shape_1 = poses_valid_2d[0].shape[-1] numJoints = dataset.skeleton().num_joints() model_pos_train = TemporalModelOptimized1f(shape_2, shape_1, numJoints, filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels) else: # When incompatible settings are detected (stride > 1, dense filters, or disabled optimization) fall back to normal model model_pos_train = TemporalModel(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(), filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels, dense=args.dense) model_pos = TemporalModel(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(), filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels, dense=args.dense) receptive_field = model_pos.receptive_field() print('INFO: Receptive field: {} frames'.format(receptive_field)) pad = (receptive_field - 1) // 2 # Padding on each side if args.causal: print('INFO: Using causal convolutions') causal_shift = pad else: causal_shift = 0 model_params = 0 for parameter in model_pos.parameters(): model_params += parameter.numel() print('INFO: Trainable parameter count:', model_params) if torch.cuda.is_available(): model_pos = model_pos.cuda() model_pos_train = model_pos_train.cuda() if args.resume or args.evaluate: chk_filename = os.path.join( args.checkpoint, args.resume if args.resume else args.evaluate) print('Loading checkpoint', chk_filename) checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage) print('This model was trained for {} epochs'.format( checkpoint['epoch'])) model_pos_train.load_state_dict(checkpoint['model_pos']) model_pos.load_state_dict(checkpoint['model_pos']) if args.evaluate and 'model_traj' in checkpoint: # Load trajectory model if it contained in the checkpoint (e.g. for inference in the wild) model_traj = TemporalModel(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], 1, filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels, dense=args.dense) if torch.cuda.is_available(): model_traj = model_traj.cuda() model_traj.load_state_dict(checkpoint['model_traj']) else: model_traj = None test_generator = UnchunkedGenerator(cameras_valid, poses_valid, poses_valid_2d, pad=pad, causal_shift=causal_shift, augment=False, kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right) print('INFO: Testing on {} frames'.format(test_generator.num_frames())) # Evaluate def evaluate(eval_generator, action=None, return_predictions=False, use_trajectory_model=False): epoch_loss_3d_pos = 0 epoch_loss_3d_pos_procrustes = 0 epoch_loss_3d_pos_scale = 0 epoch_loss_3d_vel = 0 with torch.no_grad(): if not use_trajectory_model: model_pos.eval() else: model_traj.eval() N = 0 for _, batch, batch_2d in eval_generator.next_epoch(): inputs_2d = torch.from_numpy(batch_2d.astype('float32')) if torch.cuda.is_available(): inputs_2d = inputs_2d.cuda() # Positional model if not use_trajectory_model: predicted_3d_pos = model_pos(inputs_2d) else: predicted_3d_pos = model_traj(inputs_2d) # Test-time augmentation (if enabled) if eval_generator.augment_enabled(): # Undo flipping and take average with non-flipped version predicted_3d_pos[1, :, :, 0] *= -1 if not use_trajectory_model: predicted_3d_pos[1, :, joints_left + joints_right] = predicted_3d_pos[ 1, :, joints_right + joints_left] predicted_3d_pos = torch.mean(predicted_3d_pos, dim=0, keepdim=True) if return_predictions: return predicted_3d_pos.squeeze(0).cpu().numpy() inputs_3d = torch.from_numpy(batch.astype('float32')) if torch.cuda.is_available(): inputs_3d = inputs_3d.cuda() inputs_3d[:, :, 0] = 0 if eval_generator.augment_enabled(): inputs_3d = inputs_3d[:1] error = mpjpe(predicted_3d_pos, inputs_3d) epoch_loss_3d_pos_scale += inputs_3d.shape[ 0] * inputs_3d.shape[1] * n_mpjpe(predicted_3d_pos, inputs_3d).item() epoch_loss_3d_pos += inputs_3d.shape[0] * inputs_3d.shape[ 1] * error.item() N += inputs_3d.shape[0] * inputs_3d.shape[1] inputs = inputs_3d.cpu().numpy().reshape( -1, inputs_3d.shape[-2], inputs_3d.shape[-1]) predicted_3d_pos = predicted_3d_pos.cpu().numpy().reshape( -1, inputs_3d.shape[-2], inputs_3d.shape[-1]) epoch_loss_3d_pos_procrustes += inputs_3d.shape[ 0] * inputs_3d.shape[1] * p_mpjpe(predicted_3d_pos, inputs) # Compute velocity error epoch_loss_3d_vel += inputs_3d.shape[0] * inputs_3d.shape[ 1] * mean_velocity_error(predicted_3d_pos, inputs) if action is None: print('----------') else: print('----' + action + '----') e1 = (epoch_loss_3d_pos / N) * 1000 e2 = (epoch_loss_3d_pos_procrustes / N) * 1000 e3 = (epoch_loss_3d_pos_scale / N) * 1000 ev = (epoch_loss_3d_vel / N) * 1000 print('Test time augmentation:', eval_generator.augment_enabled()) print('Protocol #1 Error (MPJPE):', e1, 'mm') print('Protocol #2 Error (P-MPJPE):', e2, 'mm') print('Protocol #3 Error (N-MPJPE):', e3, 'mm') print('Velocity Error (MPJVE):', ev, 'mm') print('----------') return e1, e2, e3, ev if args.render: print('Rendering...') input_keypoints = keypoints[args.viz_subject][args.viz_action][ args.viz_camera].copy() ground_truth = None if args.viz_subject in dataset.subjects( ) and args.viz_action in dataset[args.viz_subject]: if 'positions_3d' in dataset[args.viz_subject][args.viz_action]: ground_truth = dataset[args.viz_subject][ args.viz_action]['positions_3d'][args.viz_camera].copy() if ground_truth is None: print( 'INFO: this action is unlabeled. Ground truth will not be rendered.' ) gen = UnchunkedGenerator(None, None, [input_keypoints], pad=pad, causal_shift=causal_shift, augment=args.test_time_augmentation, kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right) prediction = evaluate(gen, return_predictions=True) if model_traj is not None and ground_truth is None: prediction_traj = evaluate(gen, return_predictions=True, use_trajectory_model=True) prediction += prediction_traj if args.viz_export is not None: print('Exporting joint positions to', args.viz_export) # Predictions are in camera space np.save(args.viz_export, prediction) if args.viz_output is not None: if ground_truth is not None: # Reapply trajectory trajectory = ground_truth[:, :1] ground_truth[:, 1:] += trajectory prediction += trajectory # Invert camera transformation cam = dataset.cameras()[args.viz_subject][args.viz_camera] if ground_truth is not None: prediction = camera_to_world(prediction, R=cam['orientation'], t=cam['translation']) ground_truth = camera_to_world(ground_truth, R=cam['orientation'], t=cam['translation']) else: # If the ground truth is not available, take the camera extrinsic params from a random subject. # They are almost the same, and anyway, we only need this for visualization purposes. for subject in dataset.cameras(): if 'orientation' in dataset.cameras()[subject][ args.viz_camera]: rot = dataset.cameras()[subject][ args.viz_camera]['orientation'] break prediction = camera_to_world(prediction, R=rot, t=0) # We don't have the trajectory, but at least we can rebase the height prediction[:, :, 2] -= np.min(prediction[:, :, 2]) anim_output = {'Reconstruction': prediction} if ground_truth is not None and not args.viz_no_ground_truth: anim_output['Ground truth'] = ground_truth input_keypoints = image_coordinates(input_keypoints[..., :2], w=cam['res_w'], h=cam['res_h']) print("Writing to json") import json # format the data in the same format as mediapipe, so we can load it in unity with the same script # we need a list (frames) of lists of 3d landmarks. # but prediction[] only has 17 landmarks, and we need 25 in our unity script unity_landmarks = prediction.tolist() with open(args.output_json, "w") as json_file: json.dump(unity_landmarks, json_file) if args.rendervideo == "yes": from common.visualization import render_animation render_animation(input_keypoints, keypoints_metadata, anim_output, dataset.skeleton(), dataset.fps(), args.viz_bitrate, cam['azimuth'], args.viz_output, limit=args.viz_limit, downsample=args.viz_downsample, size=args.viz_size, input_video_path=args.viz_video, viewport=(cam['res_w'], cam['res_h']), input_video_skip=args.viz_skip)
class Predictor: def __init__(self, dataset_path, checkpoint_path, input_video_path=None, export_path=None, output_path=None, with_cude=False): self.with_cuda = with_cude self.dataset_path = dataset_path self.export_path = export_path self.output_path = output_path self.input_video_path = input_video_path self.dataset = CustomDataset(self.dataset_path) self.keypoints = None self.keypoints_left = None self.keypoints_right = None self.joints_left = None self.joints_right = None self.checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) self.model = None self.init_keypoints() self.valid_poses = self.keypoints["detectron2"]["custom"] self.init_model() self.test_generator = None self.init_generator() self.prediction = None self.make_prediction() def export_prediction(self): if self.export_path is not None: np.save(self.export_path, self.prediction) def init_model(self): self.model = TemporalModel(self.valid_poses[0].shape[-2], self.valid_poses[0].shape[-1], self.dataset.skeleton().num_joints(), filter_widths=[3, 3, 3, 3, 3], causal=False, dropout=0.25, channels=1024, dense=False) self.model.load_state_dict(self.checkpoint['model_pos']) def init_keypoints(self): self.keypoints = np.load(self.dataset_path, allow_pickle=True) keypoints_metadata = self.keypoints['metadata'].item() keypoints_symmetry = keypoints_metadata['keypoints_symmetry'] self.keypoints_left, self.keypoints_right = list( keypoints_symmetry[0]), list(keypoints_symmetry[1]) self.joints_left, self.joints_right = list( self.dataset.skeleton().joints_left()), list( self.dataset.skeleton().joints_right()) self.keypoints = self.keypoints['positions_2d'].item() for subject in self.keypoints.keys(): for action in self.keypoints[subject]: for cam_idx, kps in enumerate(self.keypoints[subject][action]): # Normalize camera frame cam = self.dataset.cameras()[subject][cam_idx] kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h']) self.keypoints[subject][action][cam_idx] = kps def init_generator(self): receptive_field = self.model.receptive_field() pad = (receptive_field - 1) // 2 causal_shift = 0 self.test_generator = UnchunkedGenerator( None, None, self.valid_poses, pad=pad, causal_shift=causal_shift, augment=False, kps_left=self.keypoints_left, kps_right=self.keypoints_right, joints_left=self.joints_left, joints_right=self.joints_right) def make_prediction(self): if self.with_cuda: self.model = self.model.cuda() with torch.no_grad(): self.model.eval() for _, batch, batch_2d in self.test_generator.next_epoch(): inputs_2d = torch.from_numpy(batch_2d.astype('float32')) if self.with_cuda: inputs_2d = inputs_2d.cuda() predicted_3d_pos = self.model(inputs_2d) if self.test_generator.augment_enabled(): predicted_3d_pos[1, :, :, 0] *= -1 predicted_3d_pos[1, :, self.joints_left + self.joints_right] = predicted_3d_pos[ 1, :, self.joints_right + self.joints_left] predicted_3d_pos = torch.mean(predicted_3d_pos, dim=0, keepdim=True) predicted_3d_pos = predicted_3d_pos.squeeze(0).cpu().numpy() rot = self.dataset.cameras()['detectron2'][0]['orientation'] predicted_3d_pos = camera_to_world(predicted_3d_pos, R=rot, t=0) predicted_3d_pos[:, :, 2] -= np.min(predicted_3d_pos[:, :, 2]) self.prediction = predicted_3d_pos def plot_pose(self, pose_index=0): pose = make_pose(self.prediction.tolist()[pose_index]) pose.prepare_plot() pose.plot()
filter_widths = [int(x) for x in args.architecture.split(',')] model_pos = TemporalModel(17, 2, 17, filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels, dense=args.dense) receptive_field = model_pos.receptive_field() print('INFO: Receptive field: {} frames'.format(receptive_field)) pad = (receptive_field - 1) // 2 # Padding on each side print(pad) dataset = CustomDataset('/home/kjakkala/VideoPose3D/tmp.npz') joints_left, joints_right = list(dataset.skeleton().joints_left()), list( dataset.skeleton().joints_right()) print(joints_left, joints_right) print('Loading 2D detections...') keypoints = np.load('/home/kjakkala/tmp.npz', allow_pickle=True) keypoints_metadata = keypoints['metadata'].item() keypoints_symmetry = keypoints_metadata['keypoints_symmetry'] kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1]) joints_left, joints_right = list(dataset.skeleton().joints_left()), list( dataset.skeleton().joints_right()) print(joints_left, joints_right) keypoints = keypoints['positions_2d'].item() for subject in keypoints.keys():
#get path to dataset print('Loading dataset...') dataset_path = 'data/data_3d_' + args.dataset + '.npz' #don't use ellipsis to truncate arrays when printing #np.set_printoptions(threshold=sys.maxsize) #dataset init from common.custom_dataset import CustomDataset #check path of npz print('PATH: outs/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') #create new CustomDataset object dataset = CustomDataset('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') #NOTE CHANGE print('Preparing data...') print(dataset.subjects()) #looks like dict_keys(['../vids/output.mp4']) for subject in dataset.subjects( ): #should have just one subject, which will be '../vids/output.mp4' print(dataset[subject]) '''looks like {'custom': {'cameras': {'id': '../vids/output.mp4', 'res_w': 1080, 'res_h': 1920, 'azimuth': 70, 'orientation': array([ 0.14070565, -0.15007018, -0.7552408 , 0.62232804], dtype=float32), 'translation': array([1.841107 , 4.9552846, 1.5634454], dtype=float32)}}}''' print(dataset[subject].keys()) #something like dict_keys(['custom']) #should just be one key 'custom'
causal = False viz_no_ground_truth = True # do not show ground-truth poses viz_bitrate = 3000 viz_limit = -1 viz_downsample = 1 viz_size = 5 viz_skip = 0 viz_fps = 30 resume = '' evaluate = 'pretrained_h36m_detectron_coco.bin' checkpoint = 'checkpoint' dataset = CustomDataset(myvideos_path) # print(dataset) # print(dataset.cameras()) # print(dataset.fps()) # print(dataset.skeleton()) # print(dataset.subjects()) print('Preparing data...') for subject in dataset.subjects(): for action in dataset[subject].keys(): anim = dataset[subject][action] if 'positions' in anim: positions_3d = [] for cam in anim['cameras']: