def load_2d_predictions(sequence, detector): """ Loads precreated 2D pose predictions. These are matched with GT poses. """ assert detector in ['openpose', 'hrnet'] sequence = _decode_sequence(sequence) if detector == 'openpose': return load(os.path.join('data', "openpose", sequence + '.pkl'), pkl_py2_comp=True) elif detector == 'hrnet': return load(os.path.join('data', "hrnet_matched", sequence + '.pkl'), pkl_py2_comp=True)
def __init__(self, img_folder, metadata, poses_path, depth_folder): self.transform = None self.images = sorted(os.listdir(img_folder)) # Load camera parameters with open(metadata, 'r') as f: data = f.readlines() data = [x.split(',') for x in data] data = [[y.strip() for y in x] for x in data] camera_params = {x[0]: [float(y) for y in x[1:]] for x in data[1:]} # Prepare data poses2d = [] fx = [] fy = [] cx = [] cy = [] img_names = [] jointwise_depth = [] pred2d = load(poses_path) for image in self.images: poses = [np.array(x['keypoints']).reshape((17, 3)) for x in pred2d[image]] poses = np.stack(poses, axis=0) # (nPoses, 17, 3) poses = extend_hrnet_raw(poses) # (nPoses, 19, 3) img = cv2.imread(os.path.join(img_folder, image)) width, height = recommended_size(img.shape) depth = load(os.path.join(depth_folder, image + '.npy')) depth = depth_from_coords(depth, poses.reshape((1, -1, 3))[:, :, :2], width, height) # (nFrames(=1), nPoses*19) depth = depth.reshape((-1, 19)) # (nPoses, 19) jointwise_depth.append(depth) poses2d.append(poses) for i, field in enumerate([fx, fy, cx, cy]): field.extend([camera_params[image][i]] * len(poses)) img_names.extend([image] * len(poses)) self.poses2d = np.concatenate(poses2d).astype('float32') self.poses3d = np.ones_like(self.poses2d)[:, :17] self.fx = np.array(fx, dtype='float32') self.fy = np.array(fy, dtype='float32') self.cx = np.array(cx, dtype='float32') self.cy = np.array(cy, dtype='float32') self.img_names = np.array(img_names) self.pred_cdepths = np.concatenate(jointwise_depth).astype('float32') self.pose2d_jointset = CocoExJoints() self.pose3d_jointset = MuPoTSJoints()
def load_model(model_name): config = load(os.path.join(LOG_PATH, model_name, 'config.json')) # Input/output size calculation is hacky if config['model'] == 'depthpose': martinez_conf = default_config() martinez_conf.update_values(config['pose_net']) _, m = martinez_net(martinez_conf, 56, 17 * 3) elif config['model'] == 'depthpose_comb': m = CombinedModel(56, 17 * 3, 14, config['pose_net'], config['weak_decoder']) elif config['model'] == 'depthpose_offset': raise Exception("depthpose_offset only implemented in depthpos.ipynb") else: raise NotImplementedError("Unknown model: " + config['model']) m.cuda() m.load_state_dict( torch.load(os.path.join(LOG_PATH, model_name, 'model_params.pkl'))) m.eval() if config['model'] in ('depthpose_comb', 'depthpose_offset'): m = m.pose_net return config, m
def run_tpn(model_name, img_folder, hrnet_keypoint_file, pose_refine, focal_length, cx, cy): config, m = load_model(model_name) dataset = VideoTemporalDataset(img_folder, hrnet_keypoint_file, focal_length, focal_length, cx, cy) params_path = os.path.join(LOG_PATH, str(model_name), 'preprocess_params.pkl') transform = SaveableCompose.from_file(params_path, dataset, globals()) dataset.transform = transform assert isinstance(transform.transforms[1].normalizer, MeanNormalize3D) normalizer3d = transform.transforms[1].normalizer post_process_func = get_postprocessor(config, dataset, normalizer3d) logger = TemporalTestEvaluator(m, dataset, config['model']['loss'], True, post_process3d=post_process_func) logger.eval(calculate_scale_free=False, verbose=False) poses = logger.preds['vid'] if pose_refine: print("Refining poses...") refine_config = load('../models/pose_refine_config.json') poses = optimize_poses(poses, dataset, refine_config) return poses
def predict_imgs(model, img_folder, bbox_folder, output_file, normalize, detection_thresh): detections = {} for file in os.listdir(bbox_folder): dets = load(os.path.join(bbox_folder, file)) assert dets.shape[1] == 5 img_name = file[:-4] # remove extension detections[img_name] = dets valid_dataset = hrnet_dataset.ImgFolderDataset(cfg, img_folder, detections, normalize, detection_thresh) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, pin_memory=True) start = time.time() preds, boxes, image_names, orig_boxes = predict(cfg, valid_loader, valid_dataset, model) end = time.time() print("Time in prediction: " + str(end - start)) ensuredir(os.path.dirname(output_file)) valid_dataset.rescore_and_save_result(output_file, preds, boxes, image_names, orig_boxes)
def load_2d_predictions(sequence, detector): """ Loads precreated 2D pose predictions. These are matched with GT poses. """ assert detector == 'hrnet' sequence = _decode_sequence(sequence) return load(os.path.join(MUPO_TS_PATH, "hrnet_pose2d", sequence + '.pkl'), pkl_py2_comp=True)
def load_hrnet(cam, vid, v="v1"): return load( os.path.join( __get_path(v), "hrnet_keypoints", "cam_%d" % cam, "gt_match_posedist_80", "vid_%d.pkl" % vid, ))
def generate_vid_frames(cam, vid_id): print(cam, vid_id) metas = sequence_metas[cam][vid_id] steps = [ 2 if mpii_3dhp.get_train_fps(meta[0], meta[1]) == 50 else 1 for meta in metas ] out_folder = os.path.join(muco_temp.MUCO_TEMP_PATH, 'frames/cam_%d/vid_%d' % (cam, vid_id)) ensuredir(out_folder) gt_poses = load( os.path.join(muco_temp.MUCO_TEMP_PATH, 'frames/cam_%d/gt.pkl' % cam))[vid_id]['annot3'] hip_ind = MuPoTSJoints().index_of('hip') for i in range(NUM_FRAMES): # generate frame depths = gt_poses[i, :, hip_ind, 2] ordered_poses = np.argsort( depths)[::-1] # poses ordered by depth in decreasing order bg_ind = ordered_poses[0] img = mpii_3dhp.get_image(metas[bg_ind][0], metas[bg_ind][1], cam, metas[bg_ind][2] + i * steps[bg_ind], rgb=False) img = img.astype('float32') # add new pose onto image for pose_ind in ordered_poses[1:]: sub, seq, start = metas[pose_ind] pose_img = mpii_3dhp.get_image(sub, seq, cam, start + i * steps[pose_ind], rgb=False) # mask is 0 at greenscreen bg, 1 at foreground (body, chair) mask = mpii_3dhp.get_mask(sub, seq, cam, start + i * steps[pose_ind], 'FGmasks')[:, :, 2] / 255. mask = cv2.GaussianBlur(mask, (0, 0), 2)[:, :, np.newaxis] # chair_mask is 0 at chair, 1 everywhere else chair_mask = mpii_3dhp.get_mask(sub, seq, cam, start + i * steps[pose_ind], 'ChairMasks')[:, :, [2]] / 255 img = chair_mask * img + (1 - chair_mask) * pose_img img = mask * pose_img + (1 - mask) * img img = img.astype('uint8') cv2.imwrite(os.path.join(out_folder, 'img_%04d.jpg' % i), img, [cv2.IMWRITE_JPEG_QUALITY, 80])
def eval(cross_camera, model_folder): """ Evaluates the given model. :param cross_camera: if True, uses Protocol #3 (cross-camera setup) otherwise protocol 1 :param model_folder: the folder that contains the ``model.h5`` and ``normalisation.pkl`` files. """ data_type_2d = '2dshft' # Type of 2D input: 2dgt-ground truth, 2dsh-Stacked Hourglass estimation, 2dshft-Finetuned Stacked Hourglass frame_density = 5 if cross_camera else 1 # Sampling of video frames data_if = H36M(CAMERAS_FPATH, DATABASE_FOLDER, frame_density=frame_density) if cross_camera: test_camera_names = ['60457274'] else: test_camera_names = ['55011271', '58860488', '54138969', '60457274'] norm = load(os.path.join(model_folder, 'normalisation.pkl')) # Actionwise split of test set for error metric calculation test_dict_actionwise = {} for action_name in data_if.all_action_names: action_dict_2d = data_if.get_data_dict(data_type_2d, action_names=[action_name], camera_names=test_camera_names, subject_ids=TEST_SUBJECT_IDS) action_dict_3d = data_if.get_data_dict('3dgt', action_names=[action_name], camera_names=test_camera_names, subject_ids=TEST_SUBJECT_IDS) apply_to_pose_dict(action_dict_2d, normalize_pose_arr, norm['mean_2d'], norm['std_2d']) apply_to_pose_dict(action_dict_3d, normalize_pose_arr, norm['mean_3d'], norm['std_3d']) action2d, action3d = create_dataset(action_dict_2d, action_dict_3d, data_if.all_cam_names) action2d = action2d.reshape((action2d.shape[0], -1)) action3d = action3d.reshape((action3d.shape[0], -1)) test_dict_actionwise[action_name] = {'x': action2d, 'y': action3d} m = load_model(os.path.join(model_folder, 'model.h5'), custom_objects={'initializer_he': HeInitializerClass}) m = cut_main_branch(m) evaluator = LogAllMillimeterError(norm['std_3d'], test_dict_actionwise) evaluator.model = m evaluator.on_epoch_end(None)
def train_ground_truth(sub, seq, fix_incorrect=True): """ Returns the ground truth annotations. Returns a dict with fields 'annot2', 'annot3', 'univ_annot3' :param fix_incorrect: S4/Seq2 has annotations flipped on some frames, if True they are flipped back :return: """ annot = load( os.path.join(MPII_3DHP_PATH, "S%d" % sub, "Seq%d" % seq, "annot.mat")) annot2 = list([ x[0].reshape((-1, 28, 2))[:, MUPOTS_RELEVANT_JOINTS].astype("float32") for x in annot["annot2"] ]) annot3 = list([ x[0].reshape((-1, 28, 3))[:, MUPOTS_RELEVANT_JOINTS].astype("float32") for x in annot["annot3"] ]) univ_annot3 = list([ x[0].reshape((-1, 28, 3))[:, MUPOTS_RELEVANT_JOINTS].astype("float32") for x in annot["univ_annot3"] ]) assert np.all(annot["cameras"][0] == np.arange(14)) assert np.all(annot["frames"][:, 0] == np.arange(len(annot2[0]))) # S3/Seq1 has one extra annotation but one less frame # Remove the very last annotation from everywhere if sub == 3 and seq == 1: for cam in range(14): annot2[cam] = annot2[cam][:-1] annot3[cam] = annot3[cam][:-1] univ_annot3[cam] = univ_annot3[cam][:-1] if sub == 4 and seq == 2 and fix_incorrect: # between 3759(in) and 5853(ex) annotations are flipped for cam in range(14): annot2[cam][3759:5853] = MuPoTSJoints().flip( annot2[cam][3759:5853]) annot3[cam][3759:5853] = MuPoTSJoints().flip( annot3[cam][3759:5853]) univ_annot3[cam][3759:5853] = MuPoTSJoints().flip( univ_annot3[cam][3759:5853]) N = len(annot2[0]) for cam in range(14): assert len(annot2[cam]) == N assert len(annot3[cam]) == N assert len(univ_annot3[cam]) == N result = {"annot2": annot2, "annot3": annot3, "univ_annot3": univ_annot3} return result
def main(model_name, pose_refine): config, m = load_model(model_name) test_set = get_dataset(config) params_path = os.path.join(LOG_PATH, str(model_name), 'preprocess_params.pkl') transform = SaveableCompose.from_file(params_path, test_set, globals()) test_set.transform = transform assert isinstance(transform.transforms[1].normalizer, MeanNormalize3D) normalizer3d = transform.transforms[1].normalizer post_process_func = get_postprocessor(config, test_set, normalizer3d) logger = TemporalMupotsEvaluator(m, test_set, config['model']['loss'], True, post_process3d=post_process_func) logger.eval(calculate_scale_free=not pose_refine, verbose=not pose_refine) if pose_refine: refine_config = load('../models/pose_refine_config.json') pred = np.concatenate([logger.preds[i] for i in range(1, 21)]) pred = optimize_poses(pred, test_set, refine_config) l = StackedArrayAllMupotsEvaluator(pred, test_set, True) l.eval(calculate_scale_free=True, verbose=True) pred_by_seq = {} for seq in range(1, 21): inds = test_set.index.seq_num == seq pred_by_seq[seq] = pred[inds] pred_2d, pred_3d = unstack_mupots_poses(test_set, pred_by_seq) else: pred_2d, pred_3d = unstack_mupots_poses(test_set, logger.preds) print("\nR-PCK R-AUC A-PCK A-AUC") for relative in [True, False]: pcks, aucs = mupots_3d.eval_poses(False, relative, 'annot3' if config['pose3d_scaling'] == 'normal' else 'univ_annot3', pred_2d, pred_3d, keep_matching=True) pck = np.mean(list(pcks.values())) auc = np.mean(list(aucs.values())) print(" %4.1f %4.1f " % (pck, auc), end='') print()
def _load_cameras(self, augmented_cameras_path=None): """ Loads camera parameters for each subject with each camera. Sets members: self.cam_dict: dictionary of 4 tuples per subject ID containing its camera parameters for the 4 h36m cams """ self.cam_dict = cameras.load_cameras(self.cameras_fpath, self.all_subject_ids) if augmented_cameras_path is not None: aug_cams = load(augmented_cameras_path) self.cam_dict.update(aug_cams) self.all_cam_names = [] max_id = max([x[1] for x in self.cam_dict.keys()]) # largest camera id for c in range(1, max_id + 1): self.all_cam_names.append(self.cam_dict[(5, c)][6])
def load_model(model_folder): config = load(os.path.join(LOG_PATH, model_folder, 'config.json')) path = os.path.join(LOG_PATH, model_folder, 'model_params.pkl') # Input/output size calculation is hacky weights = torch.load(path) num_in_features = weights['expand_conv.weight'].shape[1] m = TemporalModel(num_in_features, MuPoTSJoints.NUM_JOINTS, config['model']['filter_widths'], dropout=config['model']['dropout'], channels=config['model']['channels']) m.cuda() m.load_state_dict(weights) m.eval() return config, m
def __init__(self, frame_folder, hrnet_keypoint_file, fx, fy, cx=None, cy=None): self.transform = None self.pose2d_jointset = CocoExJoints() self.pose3d_jointset = MuPoTSJoints() frame_list = sorted(os.listdir(frame_folder)) N = len(frame_list) hrnet_detections = load(hrnet_keypoint_file) self.poses2d, self.valid_2d_pred = stack_hrnet_raw( frame_list, hrnet_detections) assert len(self.poses2d) == N, "unexpected number of frames" index = [('vid', i) for i in range(N)] self.index = np.rec.array(index, dtype=[('seq', 'U4'), ('frame', 'int32')]) self.poses3d = np.ones( (N, self.pose3d_jointset.NUM_JOINTS, 3)) # dummy values # load first frame to get width/height frame = cv2.imread(os.path.join(frame_folder, frame_list[0])) self.width = frame.shape[1] self.fx = np.full(N, fx, dtype='float32') self.fy = np.full(N, fy, dtype='float32') self.cx = np.full(N, cx if cx is not None else frame.shape[1] / 2, dtype='float32') self.cy = np.full(N, cy if cy is not None else frame.shape[0] / 2, dtype='float32') assert self.poses2d.shape[1] == self.pose2d_jointset.NUM_JOINTS
def load_model(model_folder): config = load(os.path.join(LOG_PATH, model_folder, "config.json")) path = os.path.join(LOG_PATH, model_folder, "model_params.pkl") # Input/output size calculation is hacky weights = torch.load(path) num_in_features = weights["expand_conv.weight"].shape[1] m = TemporalModel( num_in_features, MuPoTSJoints.NUM_JOINTS, config["model"]["filter_widths"], dropout=0, channels=config["model"]["channels"], layernorm=config["model"]["layernorm"], ) m.cuda() m.load_state_dict(weights) m.eval() return config, m
def predict_imgs(model, img_folder, bbox_folder, output_file, normalize, detection_thresh): detections = {} for file in sorted(os.listdir(bbox_folder)): dets = load(os.path.join(bbox_folder, file)) assert dets.shape[1] == 5 img_name = file[:-4] # remove extension detections[img_name] = dets valid_dataset = hrnet_dataset.ImgFolderDataset(cfg, img_folder, detections, normalize, detection_thresh) start = time.time() preds, boxes, image_names, orig_boxes = predict_dataset( cfg, valid_dataset, model) end = time.time() print("Time in prediction: " + str(end - start)) ensuredir(os.path.dirname(output_file)) valid_dataset.rescore_and_save_result(output_file, preds, boxes, image_names, orig_boxes)
def load_raw_gt_annotations(sequence): """ Loads the GT annotations from the MuPo-TS `annnot.mat` file. """ sequence = _decode_sequence(sequence) return load( os.path.join(MUPO_TS_PATH, "MultiPersonTestSet", sequence, 'annot.mat'))['annotations']
def test_poses_hrnet(seq): return load( os.path.join(MPII_3DHP_PATH, "mpi_inf_3dhp_test_set", "TS%d" % seq, "hrnet.pkl"))
def load_jointwise_depth(sequence): return load( os.path.join('data', 'jointwise_depth', 'TS%02d.npy' % sequence))
def from_file(path, dataset, locals): state = load(path) return SaveableCompose.from_state(state, dataset, locals)
def from_file(cls, path, dataset): state = load(path) return cls.from_state(state, dataset)
def from_file(cls, path): state = load(path) return cls.from_state(state)
def main(): parser = argparse.ArgumentParser(description="Conversion.") parser.add_argument("--logdir", required=True, type=str, help="path of log directory") parser.add_argument("--checkpoint", default=None, type=str, help="path of checkpoint") parser.add_argument("--src", default=None, required=True, type=str, help="source speaker") parser.add_argument("--trg", default=None, required=True, type=str, help="target speaker") parser.add_argument("--type", default='test', type=str, help="test or valid (default is test)") parser.add_argument("--input_feat", required=True, type=str, help="input feature type") parser.add_argument("--output_feat", required=True, type=str, help="output feature type") parser.add_argument("--mcd", action='store_true', help="calculate mcd or not") parser.add_argument("--syn", action='store_true', help="synthesize voice or not") args = parser.parse_args() # make exp directory output_dir = get_default_logdir_output(args) tf.gfile.MakeDirs(output_dir) # set log level fmt = '%(asctime)s %(message)s' datefmt = '%m/%d/%Y %I:%M:%S' logFormatter = logging.Formatter(fmt, datefmt=datefmt) logging.basicConfig( level=logging.INFO, filename=os.path.join(output_dir, 'exp.log'), format=fmt, datefmt=datefmt, ) consoleHandler = logging.StreamHandler() consoleHandler.setFormatter(logFormatter) logging.getLogger().addHandler(consoleHandler) logging.info('====================') logging.info('Conversion start') logging.info(args) # Load architecture arch = tf.gfile.Glob(os.path.join( args.logdir, 'architecture*.json'))[0] # should only be 1 file with open(arch) as fp: arch = json.load(fp) # Load the model module module = import_module(arch['model_module'], package=None) MODEL = getattr(module, arch['model']) input_feat = args.input_feat input_feat_dim = arch['feat_param']['dim'][input_feat] output_feat = args.output_feat # read speakers spk_list = read_txt(arch['spklist']) # Load statistics, normalize and NCHW normalizers = {} for k in arch['normalizer']: normalizers[k] = {} for norm_type in arch['normalizer'][k]['type']: if norm_type == 'minmax': normalizer = MinMaxScaler( xmax=read_hdf5(arch['stats'], '/max/' + k), xmin=read_hdf5(arch['stats'], '/min/' + k), ) elif norm_type == 'meanvar': normalizer = StandardScaler( mu=read_hdf5(arch['stats'], '/mean/' + k), std=read_hdf5(arch['stats'], '/scale/' + k), ) normalizers[k][norm_type] = normalizer # Define placeholders x_pl = tf.placeholder(tf.float32, [None, input_feat_dim]) yh_pl = tf.placeholder(dtype=tf.int64, shape=[ 1, ]) yh = yh_pl * tf.ones(shape=[ tf.shape(x_pl)[0], ], dtype=tf.int64) yh = tf.expand_dims(yh, 0) # Define model model = MODEL(arch, normalizers) z, _ = model.encode(x_pl, input_feat) xh = model.decode(z, yh, output_feat) # make directories for output tf.gfile.MakeDirs(os.path.join(output_dir, 'latent')) tf.gfile.MakeDirs( os.path.join(output_dir, 'converted-{}'.format(output_feat))) # Define session with tf.Session() as sess: # define saver saver = tf.train.Saver() # load checkpoint if args.checkpoint is None: load( saver, sess, args.logdir, ) else: _, ckpt = os.path.split(args.checkpoint) load(saver, sess, args.logdir, ckpt=ckpt) # get feature list, either validation set or test set if args.type == 'test': files = tf.gfile.Glob( arch['conversion']['test_file_pattern'].format(args.src)) elif args.type == 'valid': files = [] for p in arch['training']['valid_file_pattern']: files.extend(tf.gfile.Glob(p.replace('*', args.src))) files = sorted(files) # conversion for f in files: basename = os.path.split(f)[-1] path_to_latent = os.path.join( output_dir, 'latent', '{}-{}-{}'.format(args.src, args.trg, basename)) path_to_cvt = os.path.join( output_dir, 'converted-{}'.format(output_feat), '{}-{}-{}'.format(args.src, args.trg, basename)) logging.info(basename) # load source features src_data = Whole_feature_reader(f, arch['feat_param']) # latent, cvt = sess.run( [z, xh], feed_dict={ yh_pl: np.asarray([spk_list.index(args.trg)]), x_pl: src_data[input_feat] }) # save bin with open(path_to_latent, 'wb') as fp: fp.write(latent.tostring()) with open(path_to_cvt, 'wb') as fp: fp.write(cvt.tostring()) # optionally calculate MCD if args.mcd: cmd = "python ./mcd_calculate.py" + \ " --type " + args.type + \ " --logdir " + output_dir + \ " --input_feat " + input_feat + \ " --output_feat " + output_feat print(cmd) os.system(cmd) # optionally synthesize waveform if args.syn: cmd = "python ./synthesize.py" + \ " --type " + args.type + \ " --logdir " + output_dir + \ " --input_feat " + input_feat + \ " --output_feat " + output_feat print(cmd) os.system(cmd)
def load_raw_gt_occlusions(sequence): sequence = _decode_sequence(sequence) return load( os.path.join(MUPO_TS_PATH, "MultiPersonTestSet", sequence, 'occlusion.mat'))['occlusion_labels']
def restore(self): if self.ckpt: load(self.saver, self.sess, self.dirs, self.ckpt) elif self.args.logdir: load(self.saver, self.sess, self.dirs)
def main(model_name, pose_refine, exp: Experiment): config, m = load_model(model_name) test_set = get_dataset(config) params_path = os.path.join(LOG_PATH, str(model_name), "preprocess_params.pkl") transform = SaveableCompose.from_file(params_path, test_set, globals()) test_set.transform = transform assert isinstance(transform.transforms[1].normalizer, MeanNormalize3D) normalizer3d = transform.transforms[1].normalizer post_process_func = get_postprocessor(config, test_set, normalizer3d) prefix = "R" if pose_refine else "NR" prefix = f"mupo_{prefix}" # logger = TemporalMupotsEvaluator( # m, # test_set, # config["model"]["loss"], # True, # post_process3d=post_process_func, # prefix="mupo_NR", # orient_norm=None # config["orient_norm"] # ) logger = TemporalTestEvaluator( m, test_set, config["model"]["loss"], True, post_process3d=post_process_func, prefix="mpi_NR", orient_norm=None # config["orient_norm"] ) logger.eval(calculate_scale_free=not pose_refine, verbose=not pose_refine) exp.log_metrics(logger.losses_to_log) # pred_3d = unstack_mpi3dhp_poses(test_set, logger) # print("\n%13s R-PCK R-AUC A-PCK A-AUC" % "") # print("%13s: " % "all poses", end="") # keys = ["R-PCK", "R-AUC", "A-PCK", "A-AUC"] # values = [] # for relative in [True, False]: # pcks, aucs = mpii_3dhp.eval_poses( # relative, # "annot3" if config["pose3d_scaling"] == "normal" else "univ_annot3", # pred_3d, # ) # pck = np.mean(list(pcks.values())) # auc = np.mean(list(aucs.values())) # values.append(pck) # values.append(auc) # print(" %4.1f %4.1f " % (pck, auc), end="") # print() # exp.log_metrics({f"{prefix}-{k}": v for k, v in zip(keys, values)}) if pose_refine: refine_config = load("../models/pose_refine_config.json") pred = np.concatenate([logger.preds[i] for i in range(1, 21)]) pred = optimize_poses(pred, test_set, refine_config) l = StackedArrayAllMupotsEvaluator(pred, test_set, True, prefix=prefix) l.eval(calculate_scale_free=True, verbose=True) exp.log_metrics(l.losses_to_log) pred_by_seq = {} for seq in range(1, 21): inds = test_set.index.seq_num == seq pred_by_seq[seq] = pred[inds] pred_2d, pred_3d = unstack_mupots_poses(test_set, pred_by_seq) else: pred_2d, pred_3d = unstack_mupots_poses(test_set, logger.preds) exp.log_metrics(logger.losses_to_log) print("\nR-PCK R-AUC A-PCK A-AUC") keys = ["R-PCK", "R-AUC", "A-PCK", "A-AUC"] values = [] for relative in [True, False]: pcks, aucs = mupots_3d.eval_poses( False, relative, "annot3" if config["pose3d_scaling"] == "normal" else "univ_annot3", pred_2d, pred_3d, keep_matching=True, ) pck = np.mean(list(pcks.values())) auc = np.mean(list(aucs.values())) values.append(pck) values.append(auc) print(" %4.1f %4.1f " % (pck, auc), end="") print() exp.log_metrics({f"{prefix}-{k}": v for k, v in zip(keys, values)})
def train_poses_hrnet(sub, seq, cam): return load(os.path.join(MPII_3DHP_PATH, 'S%d' % sub, 'Seq%d' % seq, 'hrnet', 'hrnet_%02d.pkl' % cam))
def train_poses_hrnet(sub, seq, cam): return load( os.path.join(MPII_3DHP_PATH, "S%d" % sub, "Seq%d" % seq, "hrnet", "hrnet_%02d.pkl" % cam))
def test_poses_hrnet(seq): return load(os.path.join(MPII_3DHP_PATH, 'mpi_inf_3dhp_test_set', 'TS%d' % seq, 'hrnet.pkl'))
def run(**kwargs): refine_config = load("scripts/nn_refine_config.json") for k, v in kwargs.items(): refine_config[k] = v exp = Experiment( workspace="pose-refinement", project_name="08-nn-ref-bone-length", display_summary_level=0, ) exp.log_parameters(refine_config) model_name = refine_config["model_name"] config, model = load_model(model_name) test_set = get_dataset(config) post_process_func = extract_post(model_name, test_set, config) joint_set = MuPoTSJoints() connected_joints = joint_set.LIMBGRAPH pad = (model.receptive_field() - 1) // 2 generator = UnchunkedGeneratorWithGT(test_set, pad, True) seqs = sorted(np.unique(test_set.index.seq)) optimized_preds_list = defaultdict(list) max_batch = len(generator) exp.log_parameter("max_batch", max_batch) for curr_batch, (pose2d, valid, pose3d) in enumerate(generator): exp.log_parameter("curr_batch", curr_batch) exp.log_parameter("curr_batch%", curr_batch / max_batch) if refine_config["full_batch"]: max_item = 1 else: max_item = valid.shape[-1] for curr_item in range(max_item): if not refine_config["full_batch"]: exp.log_parameter("curr_item", curr_item) exp.log_parameter("curr_item%", curr_item / max_item) if (curr_item + 1) > ( max_item - refine_config["smoothness_loss_hip_largestep"] ): reverse = True f = curr_item - refine_config["smoothness_loss_hip_largestep"] t = curr_item + 1 else: reverse = False f = curr_item t = f + refine_config["smoothness_loss_hip_largestep"] + 1 model_ = copy.deepcopy(model) optimizer = get_optimizer(model_.parameters(), refine_config) max_iter = refine_config["num_iter"] for curr_iter in range(max_iter): exp.log_parameter("curr_iter", curr_iter) exp.log_parameter("curr_iter%", curr_iter / max_iter) optimizer.zero_grad() seq = seqs[curr_batch] if refine_config["full_batch"]: nn_input = pose2d valid_ = valid[0] else: nn_input = pose2d[:, f : t + 2 * pad, :] valid_ = valid[0][f:t] pred3d = model_( torch.from_numpy(nn_input).cuda() ) # [2, 401, 42] -> [2, 21+2*13, 42], pred3d: [21, 16, 3] pred_real_pose = post_process_func( pred3d[0], seq ) # unnormalized output pred_real_pose_aug = post_process_func(pred3d[1], seq) pred_real_pose_aug[:, :, 0] *= -1 pred_real_pose_aug = test_set.pose3d_jointset.flip(pred_real_pose_aug) pred_real_pose = (pred_real_pose + pred_real_pose_aug) / 2 pred = pred_real_pose[valid_] gt_pose = post_process_func(pose3d[0], seq) inds = test_set.index.seq == seq poses_pred = abs_to_hiprel(pred, joint_set) / 1000 # (201, 17, 3) if refine_config["reinit"] or (curr_iter == 0): poses_init = poses_pred.detach().clone() poses_init.requires_grad = False if not refine_config["full_batch"]: kp_score = np.mean(test_set.poses2d[inds, :, 2], axis=-1)[ f:t ] # (201,) else: kp_score = np.mean( test_set.poses2d[inds, :, 2], axis=-1 ) # (201,) # if refine_config['smooth_visibility']: # kp_score = ndimage.median_filter(kp_score, 9) kp_score = torch.from_numpy(kp_score).cuda() # [201] scale = torch.ones((len(kp_score), 1, 1)) # torch.Size([201, 1, 1]) kp_score.requires_grad = False scale.requires_grad = False # smoothing formulation if refine_config["pose_loss"] == "gm": pose_loss = kp_score.view(-1, 1, 1) * gmloss( poses_pred - poses_init, refine_config["gm_alpha"] ) elif refine_config["pose_loss"] == "capped_l2": pose_loss = kp_score.view(-1, 1, 1) * capped_l2( poses_pred - poses_init, torch.tensor(refine_config["l2_cap"]).float().cuda(), ) elif refine_config["pose_loss"] == "capped_l2_euc_err": pose_loss = kp_score.view(-1, 1) * capped_l2_euc_err( poses_pred, poses_init, torch.tensor(refine_config["l2_cap"]).float().cuda(), ) else: raise NotImplementedError( "Unknown pose_loss" + refine_config["pose_loss"] ) velocity_loss_hip = globals()[refine_config["smoothness_loss_hip"]]( poses_pred[:, [0], :], 1 ) step = refine_config["smoothness_loss_hip_largestep"] vel_loss = globals()[refine_config["smoothness_loss_hip"]]( poses_pred[:, [0], :], step ) velocity_loss_hip_large = (1 - kp_score[-len(vel_loss) :]) * vel_loss velocity_loss_rel = globals()[refine_config["smoothness_loss_rel"]]( poses_pred[:, 1:, :], 1 ) vel_loss = globals()[refine_config["smoothness_loss_rel"]]( poses_pred[:, 1:, :], step ) velocity_loss_rel_large = (1 - kp_score[-len(vel_loss) :]) * vel_loss prefix = f"{curr_batch}_{curr_item}" if refine_config["full_batch"]: total_loss = ( torch.sum(pose_loss) + refine_config["smoothness_weight_hip"] * torch.sum(velocity_loss_hip) + refine_config["smoothness_weight_hip_large"] * torch.sum(velocity_loss_hip_large) + refine_config["smoothness_weight_rel"] * torch.sum(velocity_loss_rel) + refine_config["smoothness_weight_rel_large"] * torch.sum(velocity_loss_rel_large) ) m = { f"{prefix}_total_loss": total_loss, f"{prefix}_pose_loss": torch.sum(pose_loss), f"{prefix}_velocity_loss_hip": torch.sum(velocity_loss_hip), f"{prefix}_velocity_loss_hip_large": torch.sum( velocity_loss_hip_large ), f"{prefix}_velocity_loss_rel": torch.sum(velocity_loss_rel), f"{prefix}_velocity_loss_rel_large": torch.sum( velocity_loss_rel_large ), } else: neighbour_dist_idx = 0 if not reverse else -1 total_loss = ( torch.sum(pose_loss[neighbour_dist_idx,]) + refine_config["smoothness_weight_hip"] * velocity_loss_hip[[neighbour_dist_idx]] + refine_config["smoothness_weight_hip_large"] * velocity_loss_hip_large + refine_config["smoothness_weight_rel"] * velocity_loss_rel[[neighbour_dist_idx]] + refine_config["smoothness_weight_rel_large"] * velocity_loss_rel_large ) m = { f"{prefix}_total_loss": total_loss[0], f"{prefix}_pose_loss": torch.sum( pose_loss[neighbour_dist_idx,] ), f"{prefix}_velocity_loss_hip": velocity_loss_hip[ neighbour_dist_idx ], f"{prefix}_velocity_loss_hip_large": velocity_loss_hip_large[0], f"{prefix}_velocity_loss_rel": velocity_loss_rel[ neighbour_dist_idx ], f"{prefix}_velocity_loss_rel_large": velocity_loss_rel_large[0], } if refine_config["bone_weight"] != 0: assert refine_config["full_batch"] err = get_bone_lengths(pred, connected_joints) bone_err = ( torch.mean(torch.std(err, dim=0)) * refine_config["bone_weight"] ) # [cs] total_loss += bone_err m["bone_err"] = bone_err gt_bones = get_bone_lengths( torch.from_numpy(gt_pose), connected_joints ) gt_bones = torch.mean(gt_bones, dim=0) length_err = torch.nn.functional.mse_loss(err, gt_bones.cuda()) * refine_config["bone_length_weight"] total_loss += length_err m["bone_length_err"] = length_err total_loss.backward() optimizer.step() # print(m) # m = {k: v.detach().cpu().numpy() for k, v in m.items()} # exp.log_metrics(m, step=curr_iter) os.makedirs("nn_refs", exist_ok=True) np.save(f"nn_refs/{seq.replace('/', '_')}.npy", pred.cpu().detach().numpy()) if refine_config["full_batch"]: optimized_preds_list[seq].append( add_back_hip(poses_pred.detach().cpu().numpy() * 1000, joint_set) ) else: optimized_preds_list[seq].append( add_back_hip( poses_pred[[neighbour_dist_idx]].detach().cpu().numpy() * 1000, joint_set, ) ) pred = {k: np.concatenate(v) for k, v in optimized_preds_list.items()} pred = TemporalMupotsEvaluator._group_by_seq(pred) pred = np.concatenate([pred[i] for i in range(1, 21)]) l = StackedArrayAllMupotsEvaluator(pred, test_set, True, prefix="R") l.eval(calculate_scale_free=True, verbose=True) exp.log_metrics(l.losses_to_log) pred_by_seq = {} for seq in range(1, 21): inds = test_set.index.seq_num == seq pred_by_seq[seq] = pred[inds] pred_2d, pred_3d = unstack_mupots_poses(test_set, pred_by_seq) print("\nR-PCK R-AUC A-PCK A-AUC") keys = ["R-PCK", "R-AUC", "A-PCK", "A-AUC"] values = [] for relative in [True, False]: pcks, aucs = mupots_3d.eval_poses( False, relative, "annot3" if config["pose3d_scaling"] == "normal" else "univ_annot3", pred_2d, pred_3d, keep_matching=True, ) pck = np.mean(list(pcks.values())) auc = np.mean(list(aucs.values())) values.append(pck) values.append(auc) print(" %4.1f %4.1f " % (pck, auc), end="") print() exp.log_metrics({curr_iter: v for curr_iter, v in zip(keys, values)})