def eval_direction_net_rotation(src_img, trt_img, rotation_gt, n_output_distributions=3): """Evaluate the DirectionNet-R. Args: src_img: [BATCH, HEIGHT, WIDTH, 3] input source images. trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images. rotation_gt: [BATCH, 3, 3] ground truth rotation matrices. n_output_distributions: (int) number of output distributions. (either two or three) The model uses 9D representation for rotations when it is 3 and the model uses 6D representation when it is 2. Returns: Tensorflow metrics. Raises: ValueError: 'n_output_distributions' must be either 2 or 3. """ if n_output_distributions != 3 and n_output_distributions != 2: raise ValueError("'n_output_distributions' must be either 2 or 3.") net = model.DirectionNet(n_output_distributions) directions_gt = rotation_gt[:, :n_output_distributions] distribution_gt = util.spherical_normalization(util.von_mises_fisher( directions_gt, tf.constant(FLAGS.kappa, tf.float32), [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False) pred = net(src_img, trt_img, training=False) directions, _, distribution_pred = util.distributions_to_directions(pred) if n_output_distributions == 3: rotation_estimated = util.svd_orthogonalize(directions) elif n_output_distributions == 2: rotation_estimated = util.gram_schmidt(directions) angular_errors = util.angular_distance(directions, directions_gt) x_error = tf.reduce_mean(angular_errors[:, 0]) y_error = tf.reduce_mean(angular_errors[:, 1]) z_error = tf.reduce_mean(angular_errors[:, 2]) rotation_error = tf.reduce_mean(util.rotation_geodesic( rotation_estimated, rotation_gt)) for i in range(n_output_distributions): tf.summary.image('distribution/rotation/ground_truth_%d'%(i+1), distribution_gt[:, :, :, i:i+1], max_outputs=4) tf.summary.image('distribution/rotation/prediction_%d'%(i+1), distribution_pred[:, :, :, i:i+1], max_outputs=4) tf.summary.image('source_image', src_img, max_outputs=4) tf.summary.image('target_image', trt_img, max_outputs=4) metrics_to_values, metrics_to_updates = ( metrics.aggregate_metric_map({ 'angular_error/x': tf.metrics.mean( util.radians_to_degrees(x_error)), 'angular_error/y': tf.metrics.mean( util.radians_to_degrees(y_error)), 'angular_error/z': tf.metrics.mean( util.radians_to_degrees(z_error)), 'rotation_error': tf.metrics.mean( util.radians_to_degrees(rotation_error)), 'rotation_error/median': streaming_median_metric( tf.reshape(util.radians_to_degrees(rotation_error), (1,))) })) return metrics_to_values, metrics_to_updates
def eval_direction_net_single(src_img, trt_img, rotation_gt, translation_gt): """Evaluate the DirectionNet-Single. Args: src_img: [BATCH, HEIGHT, WIDTH, 3] input source images. trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images. rotation_gt: [BATCH, 3, 3] ground truth rotation matrices. translation_gt: [BATCH, 3] ground truth translation directions. Returns: Tensorflow metrics. """ net = model.DirectionNet(4) translation_gt = tf.expand_dims(translation_gt, 1) directions_gt = tf.concat([rotation_gt, translation_gt], 1) distribution_gt = util.spherical_normalization(util.von_mises_fisher( directions_gt, tf.constant(FLAGS.kappa, tf.float32), [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False) pred = net(src_img, trt_img, training=False) directions, _, distribution_pred = util.distributions_to_directions(pred) rotation_estimated = util.svd_orthogonalize( directions[:, :3]) angular_errors = util.angular_distance(directions, directions_gt) x_error = tf.reduce_mean(angular_errors[:, 0]) y_error = tf.reduce_mean(angular_errors[:, 1]) z_error = tf.reduce_mean(angular_errors[:, 2]) translation_error = tf.reduce_mean(angular_errors[:, 3]) rotation_error = tf.reduce_mean(util.rotation_geodesic( rotation_estimated, rotation_gt)) for i in range(4): tf.summary.image('distribution/rotation/ground_truth_%d'%(i+1), distribution_gt[:, :, :, i:i+1], max_outputs=4) tf.summary.image('distribution/rotation/prediction_%d'%(i+1), distribution_pred[:, :, :, i:i+1], max_outputs=4) tf.summary.image('distribution/translation/ground_truth', distribution_gt[:, :, :, -1:], max_outputs=4) tf.summary.image('distribution/translation/prediction', distribution_pred[:, :, :, -1:], max_outputs=4) tf.summary.image('source_image', src_img, max_outputs=4) tf.summary.image('target_image', trt_img, max_outputs=4) metrics_to_values, metrics_to_updates = ( metrics.aggregate_metric_map({ 'angular_error/x': tf.metrics.mean(util.radians_to_degrees(x_error)), 'angular_error/y': tf.metrics.mean(util.radians_to_degrees(y_error)), 'angular_error/z': tf.metrics.mean(util.radians_to_degrees(z_error)), 'rotation_error': tf.metrics.mean(util.radians_to_degrees(rotation_error)), 'rotation_error/median': streaming_median_metric( tf.reshape( util.radians_to_degrees(rotation_error), (1,))), 'translation_error': tf.metrics.mean(util.radians_to_degrees(translation_error)), 'translation_error/median': streaming_median_metric( tf.reshape( util.radians_to_degrees(translation_error), (1,))) })) return metrics_to_values, metrics_to_updates
def eval_direction_net_translation(src_img, trt_img, rotation_gt, translation_gt, fov_gt, rotation_pred, derotate_both=False): """Build the computation graph to train the DirectionNet-T. Args: src_img: [BATCH, HEIGHT, WIDTH, 3] input source images. trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images. rotation_gt: [BATCH, 3, 3] ground truth rotation matrices. translation_gt: [BATCH, 3] ground truth translation directions. fov_gt: [BATCH] the ground truth field of view (degrees) of input images. rotation_pred: [BATCH, 3, 3] estimated rotations from DirectionNet-R. derotate_both: (bool) transform both input images to a middle frame by half the relative rotation between them to cancel out the rotation if true. Otherwise, only derotate the target image to the source image's frame. Returns: Tensorflow metrics. """ net = model.DirectionNet(1) (transformed_src, transformed_trt) = util.derotation( src_img, trt_img, rotation_pred, fov_gt, FLAGS.transformed_fov, [FLAGS.transformed_height, FLAGS.transformed_width], derotate_both) (transformed_src_gt, transformed_trt_gt) = util.derotation( src_img, trt_img, rotation_gt, fov_gt, FLAGS.transformed_fov, [FLAGS.transformed_height, FLAGS.transformed_width], derotate_both) translation_gt = tf.expand_dims(translation_gt, 1) distribution_gt = util.spherical_normalization(util.von_mises_fisher( translation_gt, tf.constant(FLAGS.kappa, tf.float32), [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False) pred = net(transformed_src, transformed_trt, training=False) directions, _, distribution_pred = util.distributions_to_directions(pred) half_derotation = util.half_rotation(rotation_pred) # The output directions are relative to the derotated frame. Transform them # back to the source images' frame. directions = tf.matmul(directions, half_derotation, transpose_b=True) translation_error = tf.reduce_mean(tf.acos(tf.clip_by_value( tf.reduce_sum(directions * translation_gt, -1), -1., 1.))) tf.summary.image('distribution/translation/ground_truth', distribution_gt, max_outputs=4) tf.summary.image('distribution/translation/prediction', distribution_pred, max_outputs=4) tf.summary.image('source_image', src_img, max_outputs=4) tf.summary.image('target_image', trt_img, max_outputs=4) tf.summary.image('transformed_source_image', transformed_src, max_outputs=4) tf.summary.image('transformed_target_image', transformed_trt, max_outputs=4) tf.summary.image( 'transformed_source_image_gt', transformed_src_gt, max_outputs=4) tf.summary.image( 'transformed_target_image_gt', transformed_trt_gt, max_outputs=4) metrics_to_values, metrics_to_updates = ( metrics.aggregate_metric_map({ 'translation_error': tf.metrics.mean(util.radians_to_degrees(translation_error)), 'translation_error/median': streaming_median_metric( tf.reshape( util.radians_to_degrees(translation_error), (1,))) })) return metrics_to_values, metrics_to_updates
def direction_net_rotation(src_img, trt_img, rotation_gt, n_output_distributions=3): """Build the computation graph to train the DirectionNet-R. Args: src_img: [BATCH, HEIGHT, WIDTH, 3] input source images. trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images. rotation_gt: [BATCH, 3, 3] ground truth rotation matrices. n_output_distributions: (int) number of output distributions. (either two or three) The model uses 9D representation for rotations when it is 3 and the model uses 6D representation when it is 2. Returns: A collection of tensors including training ops, loss, and global step count. Raises: ValueError: 'n_output_distributions' must be either 2 or 3. """ if n_output_distributions != 3 and n_output_distributions != 2: raise ValueError("'n_output_distributions' must be either 2 or 3.") net = model.DirectionNet(n_output_distributions) global_step = tf.train.get_or_create_global_step() directions_gt = rotation_gt[:, :n_output_distributions] distribution_gt = util.spherical_normalization(util.von_mises_fisher( directions_gt, tf.constant(FLAGS.kappa, tf.float32), [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False) pred = net(src_img, trt_img, training=True) directions, expectation, distribution_pred = util.distributions_to_directions( pred) if n_output_distributions == 3: rotation_estimated = util.svd_orthogonalize(directions) elif n_output_distributions == 2: rotation_estimated = util.gram_schmidt(directions) direction_loss = losses.direction_loss(directions, directions_gt) distribution_loss = tf.constant(FLAGS.alpha, tf.float32) * losses.distribution_loss( distribution_pred, distribution_gt) spread_loss = tf.cast(FLAGS.beta, tf.float32) * losses.spread_loss(expectation) rotation_error = tf.reduce_mean( util.rotation_geodesic(rotation_estimated, rotation_gt)) direction_error = tf.reduce_mean( tf.acos( tf.clip_by_value(tf.reduce_sum(directions * directions_gt, -1), -1., 1.))) loss = direction_loss + distribution_loss + spread_loss tf.summary.scalar('loss', loss) tf.summary.scalar('distribution_loss', distribution_loss) tf.summary.scalar('spread_loss', spread_loss) tf.summary.scalar('direction_error', util.radians_to_degrees(direction_error)) tf.summary.scalar('rotation_error', util.radians_to_degrees(rotation_error)) for i in range(n_output_distributions): tf.summary.image('distribution/rotation/ground_truth_%d' % (i + 1), distribution_gt[:, :, :, i:i + 1], max_outputs=4) tf.summary.image('distribution/rotation/prediction_%d' % (i + 1), distribution_pred[:, :, :, i:i + 1], max_outputs=4) tf.summary.image('source_image', src_img, max_outputs=4) tf.summary.image('target_image', trt_img, max_outputs=4) optimizer = tf.train.GradientDescentOptimizer(FLAGS.lr) train_op = optimizer.minimize(loss, global_step=global_step, name='train') update_op = net.updates return Computation(tf.group([train_op, update_op]), loss, global_step)
def direction_net_single(src_img, trt_img, rotation_gt, translation_gt): """Build the computation graph to train the DirectionNet-Single. Args: src_img: [BATCH, HEIGHT, WIDTH, 3] input source images. trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images. rotation_gt: [BATCH, 3, 3] ground truth rotation matrices. translation_gt: [BATCH, 3] ground truth translation directions. Returns: A collection of tensors including training ops, loss, and global step count. """ net = model.DirectionNet(4) global_step = tf.train.get_or_create_global_step() directions_gt = tf.concat([rotation_gt, translation_gt], 1) distribution_gt = util.spherical_normalization(util.von_mises_fisher( directions_gt, tf.constant(FLAGS.kappa, tf.float32), [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False) pred = net(src_img, trt_img, training=True) directions, expectation, distribution_pred = util.distributions_to_directions( pred) rotation_estimated = util.svd_orthogonalize(directions[:, :3]) direction_loss = losses.direction_loss(directions, directions_gt) distribution_loss = tf.constant(FLAGS.alpha, tf.float32) * losses.distribution_loss( distribution_pred, distribution_gt) spread_loss = tf.cast(FLAGS.beta, tf.float32) * losses.spread_loss(expectation) rotation_error = tf.reduce_mean( util.rotation_geodesic(rotation_estimated, rotation_gt)) translation_error = tf.reduce_mean( tf.acos( tf.clip_by_value( tf.reduce_sum(directions[:, -1] * directions_gt[:, -1], -1), -1., 1.))) direction_error = tf.reduce_mean( tf.acos( tf.clip_by_value(tf.reduce_sum(directions * directions_gt, -1), -1., 1.))) loss = direction_loss + distribution_loss + spread_loss tf.summary.scalar('loss', loss) tf.summary.scalar('distribution_loss', distribution_loss) tf.summary.scalar('spread_loss', spread_loss) tf.summary.scalar('direction_error', util.radians_to_degrees(direction_error)) tf.summary.scalar('rotation_error', util.radians_to_degrees(rotation_error)) tf.summary.scalar('translation_error', util.radians_to_degrees(translation_error)) for i in range(3): tf.summary.image('distribution/rotation/ground_truth_%d' % (i + 1), distribution_gt[:, :, :, i:i + 1], max_outputs=4) tf.summary.image('distribution/rotation/prediction_%d' % (i + 1), distribution_pred[:, :, :, i:i + 1], max_outputs=4) tf.summary.image('distribution/translation/ground_truth', distribution_gt[:, :, :, -1:], max_outputs=4) tf.summary.image('distribution/translation/prediction', distribution_pred[:, :, :, -1:], max_outputs=4) tf.summary.image('source_image', src_img, max_outputs=4) tf.summary.image('target_image', trt_img, max_outputs=4) optimizer = tf.train.GradientDescentOptimizer(FLAGS.lr) train_op = optimizer.minimize(loss, global_step=global_step, name='train') update_op = net.updates return Computation(tf.group([train_op, update_op]), loss, global_step)
def direction_net_translation(src_img, trt_img, rotation_gt, translation_gt, fov_gt, rotation_pred, derotate_both=False): """Build the computation graph to train the DirectionNet-T. Args: src_img: [BATCH, HEIGHT, WIDTH, 3] input source images. trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images. rotation_gt: [BATCH, 3, 3] ground truth rotation matrices. translation_gt: [BATCH, 3] ground truth translation directions. fov_gt: [BATCH] the ground truth field of view (degrees) of input images. rotation_pred: [BATCH, 3, 3] estimated rotations from DirectionNet-R. derotate_both: (bool) transform both input images to a middle frame by half the relative rotation between them to cancel out the rotation if true. Otherwise, only derotate the target image to the source image's frame. Returns: A collection of tensors including training ops, loss, and global step count. """ net = model.DirectionNet(1) global_step = tf.train.get_or_create_global_step() perturbed_rotation = tf.cond( tf.less(tf.random_uniform([], 0, 1.0), 0.5), lambda: util.perturb_rotation(rotation_gt, [10., 5., 10.]), lambda: rotation_pred) (transformed_src, transformed_trt) = util.derotation( src_img, trt_img, perturbed_rotation, fov_gt, FLAGS.transformed_fov, [FLAGS.transformed_height, FLAGS.transformed_width], derotate_both) (transformed_src_gt, transformed_trt_gt) = util.derotation( src_img, trt_img, rotation_gt, fov_gt, FLAGS.transformed_fov, [FLAGS.transformed_height, FLAGS.transformed_width], derotate_both) half_derotation = util.half_rotation(perturbed_rotation) translation_gt = tf.squeeze( tf.matmul(half_derotation, tf.expand_dims(translation_gt, -1), transpose_a=True), -1) translation_gt = tf.expand_dims(translation_gt, 1) distribution_gt = util.spherical_normalization(util.von_mises_fisher( translation_gt, tf.constant(FLAGS.kappa, tf.float32), [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False) pred = net(transformed_src, transformed_trt, training=True) directions, expectation, distribution_pred = util.distributions_to_directions( pred) direction_loss = losses.direction_loss(directions, translation_gt) distribution_loss = tf.constant(FLAGS.alpha, tf.float32) * losses.distribution_loss( distribution_pred, distribution_gt) spread_loss = tf.cast(FLAGS.beta, tf.float32) * losses.spread_loss(expectation) direction_error = tf.reduce_mean( tf.acos( tf.clip_by_value(tf.reduce_sum(directions * translation_gt, -1), -1., 1.))) loss = direction_loss + distribution_loss + spread_loss tf.summary.scalar('loss', loss) tf.summary.scalar('distribution_loss', distribution_loss) tf.summary.scalar('spread_loss', spread_loss) tf.summary.scalar('direction_error', util.radians_to_degrees(direction_error)) tf.summary.image('distribution/translation/ground_truth', distribution_gt, max_outputs=4) tf.summary.image('distribution/translation/prediction', distribution_pred, max_outputs=4) tf.summary.image('source_image', src_img, max_outputs=4) tf.summary.image('target_image', trt_img, max_outputs=4) tf.summary.image('transformed_source_image', transformed_src, max_outputs=4) tf.summary.image('transformed_target_image', transformed_trt, max_outputs=4) tf.summary.image('transformed_source_image_gt', transformed_src_gt, max_outputs=4) tf.summary.image('transformed_target_image_gt', transformed_trt_gt, max_outputs=4) optimizer = tf.train.GradientDescentOptimizer(FLAGS.lr) train_op = optimizer.minimize(loss, global_step=global_step, name='train') update_op = net.updates return Computation(tf.group([train_op, update_op]), loss, global_step)