def initialize(): tf.compat.v1.disable_eager_execution() options.initialize_with_logfiles(get_parser()) logging.info(f'-- Starting --') logging.info(f'Host: {socket.gethostname()}') logging.info(f'Process id (pid): {os.getpid()}') if FLAGS.comment: logging.info(f'Comment: {FLAGS.comment}') logging.info(f'Raw command: {" ".join(map(shlex.quote, sys.argv))}') logging.info(f'Parsed flags: {FLAGS}') tfu.set_data_format(FLAGS.data_format) tfu.set_dtype(tf.float32 if FLAGS.dtype == 'float32' else tf.float16) if FLAGS.batch_size_test is None: FLAGS.batch_size_test = FLAGS.batch_size if FLAGS.checkpoint_dir is None: FLAGS.checkpoint_dir = FLAGS.logdir FLAGS.checkpoint_dir = util.ensure_absolute_path( FLAGS.checkpoint_dir, root=f'{paths.DATA_ROOT}/experiments') os.makedirs(FLAGS.checkpoint_dir, exist_ok=True) if not FLAGS.pred_path: FLAGS.pred_path = f'predictions_{FLAGS.dataset}.npz' base = os.path.dirname( FLAGS.load_path) if FLAGS.load_path else FLAGS.checkpoint_dir FLAGS.pred_path = util.ensure_absolute_path(FLAGS.pred_path, base) if FLAGS.bone_length_dataset is None: FLAGS.bone_length_dataset = FLAGS.dataset if FLAGS.load_path: if FLAGS.load_path.endswith('.index') or FLAGS.load_path.endswith( '.meta'): FLAGS.load_path = os.path.splitext(FLAGS.load_path)[0] FLAGS.load_path = util.ensure_absolute_path(FLAGS.load_path, FLAGS.checkpoint_dir) # Override the default data format in slim layers enter_context( slim.arg_scope([ slim.conv2d, slim.conv3d, slim.conv3d_transpose, slim.conv2d_transpose, slim.avg_pool2d, slim.separable_conv2d, slim.max_pool2d, slim.batch_norm, slim.spatial_softmax ], data_format=tfu.data_format())) # Override default paddings to SAME enter_context( slim.arg_scope([slim.avg_pool2d, slim.max_pool2d], padding='SAME')) tf.compat.v2.random.set_seed(FLAGS.seed) if FLAGS.gui: plt.switch_backend('TkAgg')
def initialize(args=None): options.initialize_with_logfiles(get_parser(), args) logger.info(f'-- Starting --') logger.info(f'Host: {socket.gethostname()}') logger.info(f'Process id (pid): {os.getpid()}') if FLAGS.comment: logger.info(f'Comment: {FLAGS.comment}') logger.info(f'Raw command: {" ".join(map(shlex.quote, sys.argv))}') logger.info(f'Parsed flags: {FLAGS}') tfu.set_data_format(FLAGS.data_format) tfu.set_dtype(tf.float32 if FLAGS.dtype == 'float32' else tf.float16) if FLAGS.batch_size_test is None: FLAGS.batch_size_test = FLAGS.batch_size if FLAGS.checkpoint_dir is None: FLAGS.checkpoint_dir = FLAGS.logdir FLAGS.checkpoint_dir = util.ensure_absolute_path( FLAGS.checkpoint_dir, root=f'{paths.DATA_ROOT}/experiments') os.makedirs(FLAGS.checkpoint_dir, exist_ok=True) if not FLAGS.pred_path: FLAGS.pred_path = f'predictions_{FLAGS.dataset}.npz' base = os.path.dirname( FLAGS.load_path) if FLAGS.load_path else FLAGS.checkpoint_dir FLAGS.pred_path = util.ensure_absolute_path(FLAGS.pred_path, base) if FLAGS.bone_length_dataset is None: FLAGS.bone_length_dataset = FLAGS.dataset if FLAGS.model_joints is None: FLAGS.model_joints = FLAGS.dataset if FLAGS.output_joints is None: FLAGS.output_joints = FLAGS.dataset if FLAGS.load_path: if FLAGS.load_path.endswith('.index') or FLAGS.load_path.endswith( '.meta'): FLAGS.load_path = os.path.splitext(FLAGS.load_path)[0] FLAGS.load_path = util.ensure_absolute_path(FLAGS.load_path, FLAGS.checkpoint_dir) tf.random.set_seed(FLAGS.seed) if FLAGS.viz: plt.switch_backend('TkAgg') FLAGS.backbone = FLAGS.backbone.replace('_', '-') for gpu in tf.config.experimental.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) if FLAGS.dtype == 'float16': tf.keras.mixed_precision.set_global_policy('mixed_float16')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--pred-path', type=str, default=None) parser.add_argument('--procrustes', action=options.BoolAction) parser.add_argument('--only-S11', action=options.BoolAction) parser.add_argument('--seeds', type=int, default=1) # The root joint is the last if this is set, else the first parser.add_argument('--root-last', action=options.BoolAction) options.initialize(parser) FLAGS.pred_path = util.ensure_absolute_path( FLAGS.pred_path, f'{paths.DATA_ROOT}/experiments') all_image_relpaths, all_true3d = get_all_gt_poses() activities = np.array([ re.search(f'Images/(.+?)\.', path)[1].split(' ')[0] for path in all_image_relpaths ]) if FLAGS.seeds > 1: mean_per_seed, std_per_seed = evaluate_multiple_seeds( all_true3d, activities) print(to_latex(mean_per_seed)) print(to_latex(std_per_seed)) else: metrics = evaluate(FLAGS.pred_path, all_true3d, activities) print(to_latex(metrics))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--pred-path', type=str, default=None) parser.add_argument('--procrustes', action=options.YesNoAction) parser.add_argument('--acausal-smoothing', action=options.YesNoAction) parser.add_argument('--causal-smoothing', action=options.YesNoAction) options.initialize(parser) FLAGS.pred_path = util.ensure_absolute_path( FLAGS.pred_path, f'{paths.DATA_ROOT}/experiments') poses3d_true_dict = get_all_gt_poses() poses3d_pred_dict = get_all_pred_poses() all_pred3d = np.array( [poses3d_pred_dict[relpath] for relpath in poses3d_true_dict]) all_true3d = np.array(list(poses3d_true_dict.values())) all_pred3d -= all_pred3d[:, :1] all_true3d -= all_true3d[:, :1] all_pred3d_aligned = util3d.rigid_align_many(all_pred3d, all_true3d, scale_align='rigid+scale') dist = np.linalg.norm(all_true3d - all_pred3d, axis=-1) dist_aligned = np.linalg.norm(all_true3d - all_pred3d_aligned, axis=-1) mpjpe = np.mean(dist) mpjpe_pa = np.mean(dist_aligned) major_dist = dist[:, [1, 2, 4, 5, 7, 8, 16, 17, 18, 19, 20, 21]] pck = np.mean(major_dist / 50 <= 1) * 100 auc = np.mean( np.maximum(0, 1 - (np.floor(major_dist / 199 * 50) + 0.5) / 50)) * 100 print('MPJPE & MPJPE_PA & PCK & AUC') print(to_latex([mpjpe, mpjpe_pa, pck, auc]))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--pred-path', type=str, default=None) parser.add_argument('--universal-skeleton', action=options.BoolAction) parser.add_argument('--seeds', type=int, default=1) parser.add_argument('--corrected-TS6', action=options.BoolAction, default=True) parser.add_argument('--root-last', action=options.BoolAction, default=False) options.initialize(parser) FLAGS.pred_path = util.ensure_absolute_path( FLAGS.pred_path, f'{paths.DATA_ROOT}/experiments') all_image_relpaths, all_true3d, activities = get_all_gt_poses() def get_scene_name(image_path): i_subject = int(re.search(r'/TS(\d+?)/', image_path)[1]) return ['green-screen', 'no-green-screen', 'outdoor'][(i_subject - 1) // 2] scene_names = np.array( [get_scene_name(path) for path in all_image_relpaths]) if FLAGS.seeds > 1: mean_per_seed, std_per_seed = evaluate_multiple_seeds( all_true3d, activities, scene_names) print(to_latex(mean_per_seed)) print(to_latex(std_per_seed)) else: metrics = evaluate(FLAGS.pred_path, all_true3d, activities, scene_names) print(to_latex(metrics))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--pred-path', type=str, default=None) parser.add_argument('--seeds', type=int, default=1) options.initialize(parser) FLAGS.pred_path = util.ensure_absolute_path( FLAGS.pred_path, f'{paths.DATA_ROOT}/experiments') all_true2d, all_true3d, all_true3d_univ = get_all_gt_poses() output = evaluate(FLAGS.pred_path, all_true2d, all_true3d, all_true3d_univ) print( 'Root-relative PCK for detected poses (normalized skeletons, bone rescaling) (Table 11)' ) print(to_latex(output['univ', 'rootrel', 'rescale']['pck_matched'])) print() print( 'Absolute PCK for detected poses (normalized skeletons, bone rescaling) (Table 11)' ) print(to_latex(output['univ', 'nonrootrel', 'rescale']['pck_matched'])) print(to_latex(output['univ', 'nonrootrel', 'rescale']['auc_matched'])) print() print( 'Detected poses (unnormalized skeletons, no bone rescaling) (Table 8)') numbers = [ output['nonuniv', 'nonrootrel', 'norescale']['mpjpe14'][-1], output['nonuniv', 'rootrel', 'norescale']['mpjpe14'][-1], output['nonuniv', 'nonrootrel', 'norescale']['pck_matched'][-1], output['nonuniv', 'rootrel', 'norescale']['pck_matched'][-1], output['recall'] * 100 ] print(to_latex(numbers))
def imread_jpeg(path, dst=None): assert dst is None if isinstance(path, bytes): path = path.decode('utf8') elif isinstance(path, np.str): path = str(path) path = util.ensure_absolute_path(path) return imageio.imread(path)
def initialize_with_logfiles(parser, args=None): parser.add_argument('--logdir', type=str, default='default_logdir') parser.add_argument('--file', type=open, action=ParseFromFileAction) parser.add_argument('--loglevel', type=str, default='info') if isinstance(args, str): args = shlex.split(args) parser.parse_args(args=args, namespace=FLAGS) loglevel = dict(error=40, warning=30, info=20, debug=10)[FLAGS.loglevel] FLAGS.logdir = util.ensure_absolute_path( FLAGS.logdir, root=f'{paths.DATA_ROOT}/experiments') os.makedirs(FLAGS.logdir, exist_ok=True) simple_logfile_path = f'{FLAGS.logdir}/log.txt' detailed_logfile_path = f'{FLAGS.logdir}/log_detailed.txt' simple_logfile_handler = logging.FileHandler(simple_logfile_path) simple_logfile_handler.setLevel(loglevel) detailed_logfile_handler = logging.FileHandler(detailed_logfile_path) simple_formatter = logging.Formatter( '{asctime}-{levelname:^1.1} -- {message}', style='{') hostname = socket.gethostname().split('.', 1)[0] detailed_formatter = logging.Formatter( f'{{asctime}} - {hostname} - {{process}} - {{processName:^12.12}} -' + ' {threadName:^12.12} - {name:^12.12} - {levelname:^7.7} -- {message}', style='{') simple_logfile_handler.setFormatter(simple_formatter) detailed_logfile_handler.setFormatter(detailed_formatter) logger.addHandler(simple_logfile_handler) logger.addHandler(detailed_logfile_handler) if sys.stdout.isatty(): # We only print the log messages to stdout if it's a terminal (tty). # Otherwise it goes to the log file. # Make sure that the log messages appear above the tqdm progess bars import tqdm class TQDMFile: def write(self, x): if len(x.rstrip()) > 0: tqdm.tqdm.write(x, file=sys.stdout) print_handler = logging.StreamHandler(TQDMFile()) print_handler.setLevel(loglevel) print_handler.setFormatter(simple_formatter) logger.addHandler(print_handler) else: # Since we don't want to print the log to stdout, we also redirect stderr to the logfile to # save errors for future inspection. But stdout is still stdout. sys.stderr.flush() new_err_file = open(detailed_logfile_path, 'ab+', 0) STDERR_FILENO = 2 os.dup2(new_err_file.fileno(), STDERR_FILENO) logger.setLevel(logging.DEBUG)
def imread_jpeg(path, dst=None): if isinstance(path, bytes): path = path.decode('utf8') elif isinstance(path, np.str): path = str(path) path = util.ensure_absolute_path(path) try: return jpeg4py.JPEG(path).decode(dst) except jpeg4py.JPEGRuntimeError: logger.error(f'Could not load image at {path}, JPEG error.') raise
def main(): parser = argparse.ArgumentParser() parser.add_argument('--pred-path', type=str, default=None) parser.add_argument('--procrustes', action=options.BoolAction) parser.add_argument('--acausal-smoothing', action=options.BoolAction) parser.add_argument('--causal-smoothing', action=options.BoolAction) options.initialize(parser) FLAGS.pred_path = util.ensure_absolute_path( FLAGS.pred_path, f'{paths.DATA_ROOT}/experiments') poses3d_true_dict = get_all_gt_poses() poses3d_pred_dict = get_all_pred_poses() all_pred3d = np.array( [poses3d_pred_dict[relpath] for relpath in poses3d_true_dict]) all_true3d = np.array(list(poses3d_true_dict.values())) all_pred3d -= all_pred3d[:, :1] all_true3d -= all_true3d[:, :1] all_pred3d_aligned = tfu3d.rigid_align(all_pred3d, all_true3d, scale_align=True) dist = np.linalg.norm(all_true3d - all_pred3d, axis=-1) dist_aligned = np.linalg.norm(all_true3d - all_pred3d_aligned, axis=-1) mpjpe = np.mean(dist) mpjpe_pa = np.mean(dist_aligned) major_dist = dist[:, [1, 2, 4, 5, 7, 8, 16, 17, 18, 19, 20, 21]] major_dist_pa = dist_aligned[:, [1, 2, 4, 5, 7, 8, 16, 17, 18, 19, 20, 21]] max_dist_pa = np.max(major_dist_pa, axis=1) ncps_auc = np.mean(np.maximum(0, 1 - max_dist_pa / 300)) * 100 ncps = [ np.mean(max_dist_pa / t <= 1) * 100 for t in [50, 75, 100, 125, 150] ] pck = np.mean(major_dist / 50 <= 1) * 100 auc = np.mean( np.maximum(0, 1 - (np.floor(major_dist / 199 * 50) + 0.5) / 50)) * 100 result = 'MPJPE & MPJPE_PA & PCK & AUC & NCPS & NCPS-AUC \n' result += to_latex([mpjpe, mpjpe_pa, pck, auc, ncps[3], ncps_auc]) + '\n' result += to_latex(ncps) + '\n' result += str(np.mean(major_dist / 50 <= 1, axis=0) * 100) + '\n' result += str(np.mean(major_dist / 100 <= 1, axis=0) * 100) + '\n' result += str(np.mean(major_dist / 150 <= 1, axis=0) * 100) + '\n' print(result) util.write_file(result, f'{FLAGS.pred_path}/metrics') np.savez(f'{FLAGS.pred_path}/arrays.npz', true=all_true3d, pred=all_pred3d) for thresh in [50, 51, 52, 53, 54, 55, 60, 70, 80, 90, 100, 150, 200]: print(thresh, str(np.mean(major_dist / thresh <= 1) * 100))
def parse_and_set_global_flags(): global FLAGS parser = options.get_parser() parser.parse_args(namespace=FLAGS) FLAGS.logdir = util.ensure_absolute_path(FLAGS.logdir, root=paths.DATA_ROOT + '/experiments') os.makedirs(FLAGS.logdir, exist_ok=True) if FLAGS.batch_size_test is None: FLAGS.batch_size_test = FLAGS.batch_size if FLAGS.checkpoint_dir is None: FLAGS.checkpoint_dir = FLAGS.logdir os.makedirs(FLAGS.checkpoint_dir, exist_ok=True)
def get_dataset(dataset_name): from options import FLAGS if dataset_name.endswith('.pkl'): return util.load_pickle(util.ensure_absolute_path(dataset_name)) logger.debug(f'Making dataset {dataset_name}...') kwargs = {} def string_to_intlist(string): return tuple(int(s) for s in string.split(',')) for subj_key in ['train_subjects', 'valid_subjects', 'test_subjects']: if hasattr(FLAGS, subj_key) and getattr(FLAGS, subj_key): kwargs[subj_key] = string_to_intlist(getattr(FLAGS, subj_key)) return globals()[f'make_{dataset_name}'](**kwargs)
def initialize_with_logfiles(parser): parser.add_argument('--logdir', type=str, default='default_logdir') parser.add_argument('--file', type=open, action=ParseFromFileAction) parser.add_argument('--loglevel', type=str, default='info') parser.parse_args(namespace=FLAGS) loglevel = dict(error=40, warning=30, info=20, debug=10)[FLAGS.loglevel] FLAGS.logdir = util.ensure_absolute_path( FLAGS.logdir, root=f'{paths.DATA_ROOT}/experiments') os.makedirs(FLAGS.logdir, exist_ok=True) simple_logfile_path = f'{FLAGS.logdir}/log.txt' detailed_logfile_path = f'{FLAGS.logdir}/log_detailed.txt' simple_logfile_handler = logging.FileHandler(simple_logfile_path) simple_logfile_handler.setLevel(loglevel) detailed_logfile_handler = logging.FileHandler(detailed_logfile_path) simple_formatter = logging.Formatter( '{asctime}-{levelname:^1.1} -- {message}', style='{') hostname = socket.gethostname().split('.', 1)[0] detailed_formatter = logging.Formatter( f'{{asctime}} - {hostname} - {{process}} - {{processName:^12.12}} -' + ' {threadName:^12.12} - {name:^12.12} - {levelname:^7.7} -- {message}', style='{') simple_logfile_handler.setFormatter(simple_formatter) detailed_logfile_handler.setFormatter(detailed_formatter) handlers = [simple_logfile_handler, detailed_logfile_handler] if sys.stdout.isatty(): # We only print the log messages to stdout if it's a terminal (tty). # Otherwise it goes to the log file. print_handler = logging.StreamHandler(sys.stdout) print_handler.setLevel(loglevel) print_handler.setFormatter(simple_formatter) handlers.append(print_handler) else: # Since we don't want to print the log to stdout, we also redirect stderr to the logfile to # save errors for future inspection. But stdout is still stdout. sys.stderr.flush() new_err_file = open(detailed_logfile_path, 'ab+', 0) STDERR_FILENO = 2 os.dup2(new_err_file.fileno(), STDERR_FILENO) logging.basicConfig(level=logging.DEBUG, handlers=handlers)
def load_and_transform3d(ex, joint_info, learning_phase, rng=None): appearance_rng = util.new_rng(rng) background_rng = util.new_rng(rng) geom_rng = util.new_rng(rng) partial_visi_rng = util.new_rng(rng) output_side = FLAGS.proc_side output_imshape = (output_side, output_side) box = ex.bbox if FLAGS.partial_visibility: box = util.random_partial_subbox(boxlib.expand_to_square(box), partial_visi_rng) crop_side = np.max(box[2:]) center_point = boxlib.center(box) if ((learning_phase == TRAIN and FLAGS.geom_aug) or (learning_phase != TRAIN and FLAGS.test_aug and FLAGS.geom_aug)): center_point += util.random_uniform_disc(geom_rng) * FLAGS.shift_aug / 100 * crop_side if box[2] < box[3]: delta_y = np.array([0, box[3] / 2]) sidepoints = center_point + np.stack([-delta_y, delta_y]) else: delta_x = np.array([box[2] / 2, 0]) sidepoints = center_point + np.stack([-delta_x, delta_x]) cam = ex.camera.copy() cam.turn_towards(target_image_point=center_point) cam.undistort() cam.square_pixels() world_sidepoints = ex.camera.image_to_world(sidepoints) cam_sidepoints = cam.world_to_image(world_sidepoints) crop_side = np.linalg.norm(cam_sidepoints[0] - cam_sidepoints[1]) cam.zoom(output_side / crop_side) cam.center_principal_point(output_imshape) if FLAGS.geom_aug and (learning_phase == TRAIN or FLAGS.test_aug): s1 = FLAGS.scale_aug_down / 100 s2 = FLAGS.scale_aug_up / 100 r = FLAGS.rot_aug * np.pi / 180 zoom = geom_rng.uniform(1 - s1, 1 + s2) cam.zoom(zoom) cam.rotate(roll=geom_rng.uniform(-r, r)) world_coords = ex.univ_coords if FLAGS.universal_skeleton else ex.world_coords metric_world_coords = ex.world_coords if learning_phase == TRAIN and geom_rng.rand() < 0.5: cam.horizontal_flip() camcoords = cam.world_to_camera(world_coords)[joint_info.mirror_mapping] metric_world_coords = metric_world_coords[joint_info.mirror_mapping] else: camcoords = cam.world_to_camera(world_coords) imcoords = cam.world_to_image(metric_world_coords) image_path = util.ensure_absolute_path(ex.image_path) origsize_im = improc.imread_jpeg(image_path) interp_str = (FLAGS.image_interpolation_train if learning_phase == TRAIN else FLAGS.image_interpolation_test) antialias = (FLAGS.antialias_train if learning_phase == TRAIN else FLAGS.antialias_test) interp = getattr(cv2, 'INTER_' + interp_str.upper()) im = cameralib.reproject_image( origsize_im, ex.camera, cam, output_imshape, antialias_factor=antialias, interp=interp) if re.match('.+/mupots/TS[1-5]/.+', ex.image_path): im = improc.adjust_gamma(im, 0.67, inplace=True) elif '3dhp' in ex.image_path and re.match('.+/(TS[1-4])/', ex.image_path): im = improc.adjust_gamma(im, 0.67, inplace=True) im = improc.white_balance(im, 110, 145) if (FLAGS.background_aug_prob and hasattr(ex, 'mask') and ex.mask is not None and background_rng.rand() < FLAGS.background_aug_prob and (learning_phase == TRAIN or FLAGS.test_aug)): fgmask = improc.decode_mask(ex.mask) fgmask = cameralib.reproject_image( fgmask, ex.camera, cam, output_imshape, antialias_factor=antialias, interp=interp) im = augmentation.background.augment_background(im, fgmask, background_rng) im = augmentation.appearance.augment_appearance(im, learning_phase, appearance_rng) im = tfu.nhwc_to_std(im) im = improc.normalize01(im) # Joints with NaN coordinates are invalid is_joint_in_fov = ~np.logical_or(np.any(imcoords < 0, axis=-1), np.any(imcoords >= FLAGS.proc_side, axis=-1)) joint_validity_mask = ~np.any(np.isnan(camcoords), axis=-1) rot_to_orig_cam = ex.camera.R @ cam.R.T rot_to_world = cam.R.T inv_intrinsics = np.linalg.inv(cam.intrinsic_matrix) return ( ex.image_path, im, np.nan_to_num(camcoords).astype(np.float32), np.nan_to_num(imcoords).astype(np.float32), inv_intrinsics.astype(np.float32), rot_to_orig_cam.astype(np.float32), rot_to_world.astype(np.float32), cam.t.astype(np.float32), joint_validity_mask, np.float32(is_joint_in_fov), ex.activity_name, ex.scene_name)
def load_and_transform2d(example, joint_info, learning_phase, rng): # Get the random number generators for the different augmentations to make it reproducibile appearance_rng = util.new_rng(rng) geom_rng = util.new_rng(rng) partial_visi_rng = util.new_rng(rng) # Load the image image_path = util.ensure_absolute_path(example.image_path) im_from_file = improc.imread_jpeg(image_path) # Determine bounding box bbox = example.bbox if FLAGS.partial_visibility: bbox = util.random_partial_subbox(boxlib.expand_to_square(bbox), partial_visi_rng) crop_side = np.max(bbox) center_point = boxlib.center(bbox) orig_cam = cameralib.Camera.create2D(im_from_file.shape) cam = orig_cam.copy() cam.zoom(FLAGS.proc_side / crop_side) if FLAGS.geom_aug: center_point += util.random_uniform_disc(geom_rng) * FLAGS.shift_aug / 100 * crop_side s1 = FLAGS.scale_aug_down / 100 s2 = FLAGS.scale_aug_up / 100 cam.zoom(geom_rng.uniform(1 - s1, 1 + s2)) r = FLAGS.rot_aug * np.pi / 180 cam.rotate(roll=geom_rng.uniform(-r, r)) if FLAGS.geom_aug and geom_rng.rand() < 0.5: # Horizontal flipping cam.horizontal_flip() # Must also permute the joints to exchange e.g. left wrist and right wrist! imcoords = example.coords[joint_info.mirror_mapping] else: imcoords = example.coords new_center_point = cameralib.reproject_image_points(center_point, orig_cam, cam) cam.shift_to_center(new_center_point, (FLAGS.proc_side, FLAGS.proc_side)) is_annotation_invalid = (np.nan_to_num(imcoords[:, 1]) > im_from_file.shape[0] * 0.95) imcoords[is_annotation_invalid] = np.nan imcoords = cameralib.reproject_image_points(imcoords, orig_cam, cam) interp_str = (FLAGS.image_interpolation_train if learning_phase == TRAIN else FLAGS.image_interpolation_test) antialias = (FLAGS.antialias_train if learning_phase == TRAIN else FLAGS.antialias_test) interp = getattr(cv2, 'INTER_' + interp_str.upper()) im = cameralib.reproject_image( im_from_file, orig_cam, cam, (FLAGS.proc_side, FLAGS.proc_side), antialias_factor=antialias, interp=interp) im = augmentation.appearance.augment_appearance(im, learning_phase, appearance_rng) im = tfu.nhwc_to_std(im) im = improc.normalize01(im) joint_validity_mask = ~np.any(np.isnan(imcoords), axis=1) # We must eliminate NaNs because some TensorFlow ops can't deal with any NaNs touching them, # even if they would not influence the result. Therefore we use a separate "joint_validity_mask" # to indicate which joint coords are valid. imcoords = np.nan_to_num(imcoords) return example.image_path, np.float32(im), np.float32(imcoords), joint_validity_mask
def load_and_transform3d(ex, joint_info, learning_phase, rng): # Get the random number generators for the different augmentations to make it reproducibile appearance_rng = util.new_rng(rng) background_rng = util.new_rng(rng) geom_rng = util.new_rng(rng) partial_visi_rng = util.new_rng(rng) output_side = FLAGS.proc_side output_imshape = (output_side, output_side) if 'sailvos' in ex.image_path.lower(): # This is needed in order not to lose precision in later operations. # Background: In the Sailvos dataset (GTA V), some world coordinates # are crazy large (several kilometers, i.e. millions of millimeters, which becomes # hard to process with the limited simultaneous dynamic range of float32). # They are stored in float64 but the processing is done in float32 here. ex.world_coords -= ex.camera.t ex.camera.t[:] = 0 box = ex.bbox if 'surreal' in ex.image_path.lower(): # Surreal images are flipped wrong in the official dataset release box = box.copy() box[0] = 320 - (box[0] + box[2]) # Partial visibility if 'surreal' in ex.image_path.lower() and 'surmuco' not in FLAGS.dataset: partial_visi_prob = 0.5 elif 'h36m' in ex.image_path.lower() and 'many' in FLAGS.dataset: partial_visi_prob = 0.5 else: partial_visi_prob = FLAGS.partial_visibility_prob use_partial_visi_aug = ((learning_phase == TRAIN or FLAGS.test_aug) and partial_visi_rng.rand() < partial_visi_prob) if use_partial_visi_aug: box = util.random_partial_subbox(boxlib.expand_to_square(box), partial_visi_rng) # Geometric transformation and augmentation crop_side = np.max(box[2:]) center_point = boxlib.center(box) if ((learning_phase == TRAIN and FLAGS.geom_aug) or (learning_phase != TRAIN and FLAGS.test_aug and FLAGS.geom_aug)): center_point += util.random_uniform_disc( geom_rng) * FLAGS.shift_aug / 100 * crop_side # The homographic reprojection of a rectangle (bounding box) will not be another rectangle # Hence, instead we transform the side midpoints of the short sides of the box and # determine an appropriate zoom factor by taking the projected distance of these two points # and scaling that to the desired output image side length. if box[2] < box[3]: # Tall box: take midpoints of top and bottom sides delta_y = np.array([0, box[3] / 2]) sidepoints = center_point + np.stack([-delta_y, delta_y]) else: # Wide box: take midpoints of left and right sides delta_x = np.array([box[2] / 2, 0]) sidepoints = center_point + np.stack([-delta_x, delta_x]) cam = ex.camera.copy() cam.turn_towards(target_image_point=center_point) cam.undistort() cam.square_pixels() cam_sidepoints = cameralib.reproject_image_points(sidepoints, ex.camera, cam) crop_side = np.linalg.norm(cam_sidepoints[0] - cam_sidepoints[1]) cam.zoom(output_side / crop_side) cam.center_principal_point(output_imshape) if FLAGS.geom_aug and (learning_phase == TRAIN or FLAGS.test_aug): s1 = FLAGS.scale_aug_down / 100 s2 = FLAGS.scale_aug_up / 100 zoom = geom_rng.uniform(1 - s1, 1 + s2) cam.zoom(zoom) r = np.deg2rad(FLAGS.rot_aug) cam.rotate(roll=geom_rng.uniform(-r, r)) world_coords = ex.univ_coords if FLAGS.universal_skeleton else ex.world_coords metric_world_coords = ex.world_coords if learning_phase == TRAIN and geom_rng.rand() < 0.5: cam.horizontal_flip() # Must reorder the joints due to left and right flip camcoords = cam.world_to_camera(world_coords)[ joint_info.mirror_mapping] metric_world_coords = metric_world_coords[joint_info.mirror_mapping] else: camcoords = cam.world_to_camera(world_coords) imcoords = cam.world_to_image(metric_world_coords) # Load and reproject image image_path = util.ensure_absolute_path(ex.image_path) origsize_im = improc.imread_jpeg(image_path) if 'surreal' in ex.image_path.lower(): # Surreal images are flipped wrong in the official dataset release origsize_im = origsize_im[:, ::-1] interp_str = (FLAGS.image_interpolation_train if learning_phase == TRAIN else FLAGS.image_interpolation_test) antialias = (FLAGS.antialias_train if learning_phase == TRAIN else FLAGS.antialias_test) interp = getattr(cv2, 'INTER_' + interp_str.upper()) im = cameralib.reproject_image(origsize_im, ex.camera, cam, output_imshape, antialias_factor=antialias, interp=interp) # Color adjustment if re.match('.*mupots/TS[1-5]/.+', ex.image_path): im = improc.adjust_gamma(im, 0.67, inplace=True) elif '3dhp' in ex.image_path and re.match('.+/(TS[1-4])/', ex.image_path): im = improc.adjust_gamma(im, 0.67, inplace=True) im = improc.white_balance(im, 110, 145) elif 'panoptic' in ex.image_path.lower(): im = improc.white_balance(im, 120, 138) # Background augmentation if hasattr(ex, 'mask') and ex.mask is not None: bg_aug_prob = 0.2 if 'sailvos' in ex.image_path.lower( ) else FLAGS.background_aug_prob if (FLAGS.background_aug_prob and (learning_phase == TRAIN or FLAGS.test_aug) and background_rng.rand() < bg_aug_prob): fgmask = improc.decode_mask(ex.mask) if 'surreal' in ex.image_path: # Surreal images are flipped wrong in the official dataset release fgmask = fgmask[:, ::-1] fgmask = cameralib.reproject_image(fgmask, ex.camera, cam, output_imshape, antialias_factor=antialias, interp=interp) im = augmentation.background.augment_background( im, fgmask, background_rng) # Occlusion and color augmentation im = augmentation.appearance.augment_appearance(im, learning_phase, FLAGS.occlude_aug_prob, appearance_rng) im = tfu.nhwc_to_std(im) im = improc.normalize01(im) # Joints with NaN coordinates are invalid is_joint_in_fov = ~np.logical_or( np.any(imcoords < 0, axis=-1), np.any(imcoords >= FLAGS.proc_side, axis=-1)) joint_validity_mask = ~np.any(np.isnan(camcoords), axis=-1) rot_to_orig_cam = ex.camera.R @ cam.R.T rot_to_world = cam.R.T return dict(image=im, intrinsics=np.float32(cam.intrinsic_matrix), image_path=ex.image_path, coords3d_true=np.nan_to_num(camcoords).astype(np.float32), coords2d_true=np.nan_to_num(imcoords).astype(np.float32), rot_to_orig_cam=rot_to_orig_cam.astype(np.float32), rot_to_world=rot_to_world.astype(np.float32), cam_loc=cam.t.astype(np.float32), joint_validity_mask=joint_validity_mask, is_joint_in_fov=np.float32(is_joint_in_fov))
def make_efficient_example(ex, new_image_path, further_expansion_factor=1, image_adjustments_3dhp=False, min_time=None): """Make example by storing the image in a cropped and resized version for efficient loading""" is3d = hasattr(ex, 'world_coords') w, h = (improc.image_extents(util.ensure_absolute_path(ex.image_path)) if isinstance(ex.image_path, str) else (ex.image_path.shape[1], ex.image_path.shape[0])) full_box = boxlib.full_box(imsize=[w, h]) if is3d: old_camera = ex.camera new_camera = ex.camera.copy() new_camera.turn_towards(target_image_point=boxlib.center(ex.bbox)) new_camera.undistort() else: old_camera = cameralib.Camera.create2D() new_camera = old_camera.copy() reprojected_box = reproject_box(ex.bbox, old_camera, new_camera, method='side_midpoints') reprojected_full_box = reproject_box(full_box, old_camera, new_camera, method='corners') expanded_bbox = (get_expanded_crop_box( reprojected_box, reprojected_full_box, further_expansion_factor) if further_expansion_factor > 0 else reprojected_box) scale_factor = min(1.2, 256 / np.max(reprojected_box[2:]) * 1.5) new_camera.shift_image(-expanded_bbox[:2]) new_camera.scale_output(scale_factor) reprojected_box = reproject_box(ex.bbox, old_camera, new_camera, method='side_midpoints') dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_image_abspath = util.ensure_absolute_path(new_image_path) if not (util.is_file_newer(new_image_abspath, min_time) and improc.is_image_readable(new_image_abspath)): im = improc.imread_jpeg(ex.image_path) if isinstance( ex.image_path, str) else ex.image_path #host_im, cuda_im = get_memory(im.shape) im = np.power((im.astype(np.float32) / 255), 2.2) #cuda_im.upload(host_im) new_im = cameralib.reproject_image(im, old_camera, new_camera, dst_shape, antialias_factor=2, interp=cv2.INTER_CUBIC) new_im = np.clip(new_im, 0, 1) if image_adjustments_3dhp: # enhance the 3dhp images to reduce the green tint and increase brightness new_im = (new_im**(1 / 2.2 * 0.67) * 255).astype(np.uint8) new_im = improc.white_balance(new_im, 110, 145) else: new_im = (new_im**(1 / 2.2) * 255).astype(np.uint8) util.ensure_path_exists(new_image_abspath) imageio.imwrite(new_image_abspath, new_im, quality=95) assert improc.is_image_readable(new_image_abspath) new_ex = copy.deepcopy(ex) new_ex.bbox = reprojected_box new_ex.image_path = new_image_path if is3d: new_ex.camera = new_camera else: new_ex.coords = cameralib.reproject_image_points( new_ex.coords, old_camera, new_camera) if hasattr(ex, 'mask') and ex.mask is not None: if isinstance(ex.mask, str): mask = improc.imread_jpeg(util.ensure_absolute_path(ex.mask)) host_mask, cuda_mask = get_memory(mask.shape) np.divide(mask.astype(np.float32), 255, out=host_mask) cuda_mask.upload(host_mask) mask_reproj = cameralib.reproject_image( cuda_mask, ex.camera, new_camera, dst_shape, antialias_factor=2).download() mask_reproj = 255 * (mask_reproj[..., 0] > 32 / 255).astype( np.uint8) new_ex.mask = get_connected_component_with_highest_iou( mask_reproj, reprojected_box) else: new_ex.mask = ex.mask return new_ex
def export(): logging.info('Exporting model file.') tf.compat.v1.reset_default_graph() t = attrdict.AttrDict() t.x = tf.compat.v1.placeholder( shape=[None, FLAGS.proc_side, FLAGS.proc_side, 3], dtype=tfu.get_dtype()) t.x = tfu.nhwc_to_std(t.x) is_absolute_model = FLAGS.scale_recovery in ('metrabs', ) if is_absolute_model: intrinsics_tensor = tf.compat.v1.placeholder(shape=[None, 3, 3], dtype=tf.float32) t.inv_intrinsics = tf.linalg.inv(intrinsics_tensor) else: intrinsics_tensor = None joint_info = data.datasets3d.get_dataset(FLAGS.dataset).joint_info if FLAGS.scale_recovery == 'metrabs': model.metrabs.build_metrabs_inference_model(joint_info, t) elif FLAGS.scale_recovery == 'metro': model.metro.build_metro_inference_model(joint_info, t) else: model.twofive.build_25d_inference_model(joint_info, t) # Convert to the original joint order as defined in the original datasets # (i.e. put the pelvis back to its place from the last position, # because this codebase normally uses the last position for the pelvis in all cases for # consistency) if FLAGS.dataset == 'many': selected_joint_ids = [23, *range(23) ] if FLAGS.export_smpl else [*range(73)] elif FLAGS.dataset == 'h36m': selected_joint_ids = [16, *range(16)] else: assert FLAGS.dataset in ('mpi_inf_3dhp', 'mupots') or 'muco' in FLAGS.dataset selected_joint_ids = [*range(14), 17, 14, 15] t.coords3d_pred = tf.gather(t.coords3d_pred, selected_joint_ids, axis=1) joint_info = joint_info.select_joints(selected_joint_ids) if FLAGS.load_path: load_path = util.ensure_absolute_path(FLAGS.load_path, FLAGS.checkpoint_dir) else: checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) load_path = checkpoint.model_checkpoint_path checkpoint_dir = os.path.dirname(load_path) out_path = util.ensure_absolute_path(FLAGS.export_file, checkpoint_dir) sm = tf.compat.v1.saved_model with tf.compat.v1.Session() as sess: saver = tf.compat.v1.train.Saver() saver.restore(sess, load_path) inputs = (dict(image=t.x, intrinsics=intrinsics_tensor) if is_absolute_model else dict(image=t.x)) signature_def = sm.signature_def_utils.predict_signature_def( inputs=inputs, outputs=dict(poses=t.coords3d_pred)) os.mkdir(out_path) builder = sm.builder.SavedModelBuilder(out_path) builder.add_meta_graph_and_variables( sess, ['serve'], signature_def_map=dict(serving_default=signature_def)) builder.save() tf.compat.v1.reset_default_graph() tf.compat.v1.enable_eager_execution() crop_model = tf.saved_model.load(out_path) shutil.rmtree(out_path) wrapper_class = (ExportedAbsoluteModel if is_absolute_model else ExportedRootRelativeModel) wrapped_model = wrapper_class(crop_model, joint_info) tf.saved_model.save(wrapped_model, out_path)