def rotation_matrix(self, plane): i = plane.first_axis j = plane.second_axis theta = plane.theta cos_theta = theta sin_thesta = tf.stop_gradient(tf.sin(tf.acos(theta))) # rotation = np.eye(self.dim).tolist() # rotation[i][i] = cos_theta # rotation[i][j] = -sin_thesta # rotation[j][i] = sin_thesta # rotation[j][j] = cos_theta # rotation = tf.stack(rotation) idx = [] values = [] for k in range(self.dim): idx.append([k, k]) if k == i or k == j: values.append(cos_theta) else: values.append(1) idx.append([i, j]) values.append(-sin_thesta) idx.append([j, i]) values.append(sin_thesta) rotation = tf.SparseTensor(idx, values=tf.stack(values), dense_shape=[self.dim, self.dim]) return rotation
def relative_angle(r1, r2): """Relative angle (radians) between 3D rotation matrices.""" rel_rot = tf.matmul(tf.transpose(r1, perm=[0, 2, 1]), r2) trace = rel_rot[:, 0, 0] + rel_rot[:, 1, 1] + rel_rot[:, 2, 2] cos_theta = (trace - 1.0) / 2.0 cos_theta = tf.minimum(cos_theta, tf.ones_like(cos_theta)) cos_theta = tf.maximum(cos_theta, (-1.0) * tf.ones_like(cos_theta)) theta = tf.acos(cos_theta) return theta
def build_graph(self): self.sts_input1 = tf.placeholder(tf.string, shape=(None)) self.sts_input2 = tf.placeholder(tf.string, shape=(None)) sts_encode1 = tf.nn.l2_normalize(self.embed(self.sts_input1), axis=1) sts_encode2 = tf.nn.l2_normalize(self.embed(self.sts_input2), axis=1) self.cosine_similarities = tf.reduce_sum(tf.multiply(sts_encode1, sts_encode2), axis=1) clip_cosine_similarities = tf.clip_by_value(self.cosine_similarities, -1.0, 1.0) self.sim_scores = 1.0 - tf.acos(clip_cosine_similarities)
def rotation_geodesic(r1, r2): """Return the geodesic distance (angle in radians) between two rotations. Args: r1: [BATCH, 3, 3] rotation matrices. r2: [BATCH, 3, 3] rotation matrices. Returns: [BATCH] radian angular difference between rotation matrices. """ diff = (tf.trace(tf.matmul(r1, r2, transpose_b=True)) - 1) / 2 angular_diff = tf.acos(tf.clip_by_value(diff, -1., 1.)) return angular_diff
def batch_rot2aa(Rs): """ Rs is B x 3 x 3 void cMathUtil::RotMatToAxisAngle(const tMatrix& mat, tVector& out_axis, double& out_theta) { double c = 0.5 * (mat(0, 0) + mat(1, 1) + mat(2, 2) - 1); c = cMathUtil::Clamp(c, -1.0, 1.0); out_theta = std::acos(c); if (std::abs(out_theta) < 0.00001) { out_axis = tVector(0, 0, 1, 0); } else { double m21 = mat(2, 1) - mat(1, 2); double m02 = mat(0, 2) - mat(2, 0); double m10 = mat(1, 0) - mat(0, 1); double denom = std::sqrt(m21 * m21 + m02 * m02 + m10 * m10); out_axis[0] = m21 / denom; out_axis[1] = m02 / denom; out_axis[2] = m10 / denom; out_axis[3] = 0; } } """ cos = 0.5 * (tf.trace(Rs) - 1) cos = tf.clip_by_value(cos, -1, 1) theta = tf.acos(cos) m21 = Rs[:, 2, 1] - Rs[:, 1, 2] m02 = Rs[:, 0, 2] - Rs[:, 2, 0] m10 = Rs[:, 1, 0] - Rs[:, 0, 1] denom = tf.sqrt(m21 * m21 + m02 * m02 + m10 * m10) axis0 = tf.where(tf.abs(theta) < 0.00001, m21, m21 / denom) axis1 = tf.where(tf.abs(theta) < 0.00001, m02, m02 / denom) axis2 = tf.where(tf.abs(theta) < 0.00001, m10, m10 / denom) return tf.expand_dims(theta, 1) * tf.stack([axis0, axis1, axis2], 1)
def eval_direction_net_translation(src_img, trt_img, rotation_gt, translation_gt, fov_gt, rotation_pred, derotate_both=False): """Build the computation graph to train the DirectionNet-T. Args: src_img: [BATCH, HEIGHT, WIDTH, 3] input source images. trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images. rotation_gt: [BATCH, 3, 3] ground truth rotation matrices. translation_gt: [BATCH, 3] ground truth translation directions. fov_gt: [BATCH] the ground truth field of view (degrees) of input images. rotation_pred: [BATCH, 3, 3] estimated rotations from DirectionNet-R. derotate_both: (bool) transform both input images to a middle frame by half the relative rotation between them to cancel out the rotation if true. Otherwise, only derotate the target image to the source image's frame. Returns: Tensorflow metrics. """ net = model.DirectionNet(1) (transformed_src, transformed_trt) = util.derotation( src_img, trt_img, rotation_pred, fov_gt, FLAGS.transformed_fov, [FLAGS.transformed_height, FLAGS.transformed_width], derotate_both) (transformed_src_gt, transformed_trt_gt) = util.derotation( src_img, trt_img, rotation_gt, fov_gt, FLAGS.transformed_fov, [FLAGS.transformed_height, FLAGS.transformed_width], derotate_both) translation_gt = tf.expand_dims(translation_gt, 1) distribution_gt = util.spherical_normalization(util.von_mises_fisher( translation_gt, tf.constant(FLAGS.kappa, tf.float32), [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False) pred = net(transformed_src, transformed_trt, training=False) directions, _, distribution_pred = util.distributions_to_directions(pred) half_derotation = util.half_rotation(rotation_pred) # The output directions are relative to the derotated frame. Transform them # back to the source images' frame. directions = tf.matmul(directions, half_derotation, transpose_b=True) translation_error = tf.reduce_mean(tf.acos(tf.clip_by_value( tf.reduce_sum(directions * translation_gt, -1), -1., 1.))) tf.summary.image('distribution/translation/ground_truth', distribution_gt, max_outputs=4) tf.summary.image('distribution/translation/prediction', distribution_pred, max_outputs=4) tf.summary.image('source_image', src_img, max_outputs=4) tf.summary.image('target_image', trt_img, max_outputs=4) tf.summary.image('transformed_source_image', transformed_src, max_outputs=4) tf.summary.image('transformed_target_image', transformed_trt, max_outputs=4) tf.summary.image( 'transformed_source_image_gt', transformed_src_gt, max_outputs=4) tf.summary.image( 'transformed_target_image_gt', transformed_trt_gt, max_outputs=4) metrics_to_values, metrics_to_updates = ( metrics.aggregate_metric_map({ 'translation_error': tf.metrics.mean(util.radians_to_degrees(translation_error)), 'translation_error/median': streaming_median_metric( tf.reshape( util.radians_to_degrees(translation_error), (1,))) })) return metrics_to_values, metrics_to_updates
def testRenames(self): self.assertAllClose(1.04719755, tf.acos(0.5)) self.assertAllClose(0.5, tf.rsqrt(4.0))
def angular_distance(v1, v2): dot = tf.reduce_sum(v1 * v2, -1) return tf.acos(tf.clip_by_value(dot, -1., 1.))
def tf_quaternion_to_angle(q): return tf.acos(q[3]) * 2.0
def direction_net_rotation(src_img, trt_img, rotation_gt, n_output_distributions=3): """Build the computation graph to train the DirectionNet-R. Args: src_img: [BATCH, HEIGHT, WIDTH, 3] input source images. trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images. rotation_gt: [BATCH, 3, 3] ground truth rotation matrices. n_output_distributions: (int) number of output distributions. (either two or three) The model uses 9D representation for rotations when it is 3 and the model uses 6D representation when it is 2. Returns: A collection of tensors including training ops, loss, and global step count. Raises: ValueError: 'n_output_distributions' must be either 2 or 3. """ if n_output_distributions != 3 and n_output_distributions != 2: raise ValueError("'n_output_distributions' must be either 2 or 3.") net = model.DirectionNet(n_output_distributions) global_step = tf.train.get_or_create_global_step() directions_gt = rotation_gt[:, :n_output_distributions] distribution_gt = util.spherical_normalization(util.von_mises_fisher( directions_gt, tf.constant(FLAGS.kappa, tf.float32), [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False) pred = net(src_img, trt_img, training=True) directions, expectation, distribution_pred = util.distributions_to_directions( pred) if n_output_distributions == 3: rotation_estimated = util.svd_orthogonalize(directions) elif n_output_distributions == 2: rotation_estimated = util.gram_schmidt(directions) direction_loss = losses.direction_loss(directions, directions_gt) distribution_loss = tf.constant(FLAGS.alpha, tf.float32) * losses.distribution_loss( distribution_pred, distribution_gt) spread_loss = tf.cast(FLAGS.beta, tf.float32) * losses.spread_loss(expectation) rotation_error = tf.reduce_mean( util.rotation_geodesic(rotation_estimated, rotation_gt)) direction_error = tf.reduce_mean( tf.acos( tf.clip_by_value(tf.reduce_sum(directions * directions_gt, -1), -1., 1.))) loss = direction_loss + distribution_loss + spread_loss tf.summary.scalar('loss', loss) tf.summary.scalar('distribution_loss', distribution_loss) tf.summary.scalar('spread_loss', spread_loss) tf.summary.scalar('direction_error', util.radians_to_degrees(direction_error)) tf.summary.scalar('rotation_error', util.radians_to_degrees(rotation_error)) for i in range(n_output_distributions): tf.summary.image('distribution/rotation/ground_truth_%d' % (i + 1), distribution_gt[:, :, :, i:i + 1], max_outputs=4) tf.summary.image('distribution/rotation/prediction_%d' % (i + 1), distribution_pred[:, :, :, i:i + 1], max_outputs=4) tf.summary.image('source_image', src_img, max_outputs=4) tf.summary.image('target_image', trt_img, max_outputs=4) optimizer = tf.train.GradientDescentOptimizer(FLAGS.lr) train_op = optimizer.minimize(loss, global_step=global_step, name='train') update_op = net.updates return Computation(tf.group([train_op, update_op]), loss, global_step)
def direction_net_single(src_img, trt_img, rotation_gt, translation_gt): """Build the computation graph to train the DirectionNet-Single. Args: src_img: [BATCH, HEIGHT, WIDTH, 3] input source images. trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images. rotation_gt: [BATCH, 3, 3] ground truth rotation matrices. translation_gt: [BATCH, 3] ground truth translation directions. Returns: A collection of tensors including training ops, loss, and global step count. """ net = model.DirectionNet(4) global_step = tf.train.get_or_create_global_step() directions_gt = tf.concat([rotation_gt, translation_gt], 1) distribution_gt = util.spherical_normalization(util.von_mises_fisher( directions_gt, tf.constant(FLAGS.kappa, tf.float32), [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False) pred = net(src_img, trt_img, training=True) directions, expectation, distribution_pred = util.distributions_to_directions( pred) rotation_estimated = util.svd_orthogonalize(directions[:, :3]) direction_loss = losses.direction_loss(directions, directions_gt) distribution_loss = tf.constant(FLAGS.alpha, tf.float32) * losses.distribution_loss( distribution_pred, distribution_gt) spread_loss = tf.cast(FLAGS.beta, tf.float32) * losses.spread_loss(expectation) rotation_error = tf.reduce_mean( util.rotation_geodesic(rotation_estimated, rotation_gt)) translation_error = tf.reduce_mean( tf.acos( tf.clip_by_value( tf.reduce_sum(directions[:, -1] * directions_gt[:, -1], -1), -1., 1.))) direction_error = tf.reduce_mean( tf.acos( tf.clip_by_value(tf.reduce_sum(directions * directions_gt, -1), -1., 1.))) loss = direction_loss + distribution_loss + spread_loss tf.summary.scalar('loss', loss) tf.summary.scalar('distribution_loss', distribution_loss) tf.summary.scalar('spread_loss', spread_loss) tf.summary.scalar('direction_error', util.radians_to_degrees(direction_error)) tf.summary.scalar('rotation_error', util.radians_to_degrees(rotation_error)) tf.summary.scalar('translation_error', util.radians_to_degrees(translation_error)) for i in range(3): tf.summary.image('distribution/rotation/ground_truth_%d' % (i + 1), distribution_gt[:, :, :, i:i + 1], max_outputs=4) tf.summary.image('distribution/rotation/prediction_%d' % (i + 1), distribution_pred[:, :, :, i:i + 1], max_outputs=4) tf.summary.image('distribution/translation/ground_truth', distribution_gt[:, :, :, -1:], max_outputs=4) tf.summary.image('distribution/translation/prediction', distribution_pred[:, :, :, -1:], max_outputs=4) tf.summary.image('source_image', src_img, max_outputs=4) tf.summary.image('target_image', trt_img, max_outputs=4) optimizer = tf.train.GradientDescentOptimizer(FLAGS.lr) train_op = optimizer.minimize(loss, global_step=global_step, name='train') update_op = net.updates return Computation(tf.group([train_op, update_op]), loss, global_step)
def direction_net_translation(src_img, trt_img, rotation_gt, translation_gt, fov_gt, rotation_pred, derotate_both=False): """Build the computation graph to train the DirectionNet-T. Args: src_img: [BATCH, HEIGHT, WIDTH, 3] input source images. trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images. rotation_gt: [BATCH, 3, 3] ground truth rotation matrices. translation_gt: [BATCH, 3] ground truth translation directions. fov_gt: [BATCH] the ground truth field of view (degrees) of input images. rotation_pred: [BATCH, 3, 3] estimated rotations from DirectionNet-R. derotate_both: (bool) transform both input images to a middle frame by half the relative rotation between them to cancel out the rotation if true. Otherwise, only derotate the target image to the source image's frame. Returns: A collection of tensors including training ops, loss, and global step count. """ net = model.DirectionNet(1) global_step = tf.train.get_or_create_global_step() perturbed_rotation = tf.cond( tf.less(tf.random_uniform([], 0, 1.0), 0.5), lambda: util.perturb_rotation(rotation_gt, [10., 5., 10.]), lambda: rotation_pred) (transformed_src, transformed_trt) = util.derotation( src_img, trt_img, perturbed_rotation, fov_gt, FLAGS.transformed_fov, [FLAGS.transformed_height, FLAGS.transformed_width], derotate_both) (transformed_src_gt, transformed_trt_gt) = util.derotation( src_img, trt_img, rotation_gt, fov_gt, FLAGS.transformed_fov, [FLAGS.transformed_height, FLAGS.transformed_width], derotate_both) half_derotation = util.half_rotation(perturbed_rotation) translation_gt = tf.squeeze( tf.matmul(half_derotation, tf.expand_dims(translation_gt, -1), transpose_a=True), -1) translation_gt = tf.expand_dims(translation_gt, 1) distribution_gt = util.spherical_normalization(util.von_mises_fisher( translation_gt, tf.constant(FLAGS.kappa, tf.float32), [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False) pred = net(transformed_src, transformed_trt, training=True) directions, expectation, distribution_pred = util.distributions_to_directions( pred) direction_loss = losses.direction_loss(directions, translation_gt) distribution_loss = tf.constant(FLAGS.alpha, tf.float32) * losses.distribution_loss( distribution_pred, distribution_gt) spread_loss = tf.cast(FLAGS.beta, tf.float32) * losses.spread_loss(expectation) direction_error = tf.reduce_mean( tf.acos( tf.clip_by_value(tf.reduce_sum(directions * translation_gt, -1), -1., 1.))) loss = direction_loss + distribution_loss + spread_loss tf.summary.scalar('loss', loss) tf.summary.scalar('distribution_loss', distribution_loss) tf.summary.scalar('spread_loss', spread_loss) tf.summary.scalar('direction_error', util.radians_to_degrees(direction_error)) tf.summary.image('distribution/translation/ground_truth', distribution_gt, max_outputs=4) tf.summary.image('distribution/translation/prediction', distribution_pred, max_outputs=4) tf.summary.image('source_image', src_img, max_outputs=4) tf.summary.image('target_image', trt_img, max_outputs=4) tf.summary.image('transformed_source_image', transformed_src, max_outputs=4) tf.summary.image('transformed_target_image', transformed_trt, max_outputs=4) tf.summary.image('transformed_source_image_gt', transformed_src_gt, max_outputs=4) tf.summary.image('transformed_target_image_gt', transformed_trt_gt, max_outputs=4) optimizer = tf.train.GradientDescentOptimizer(FLAGS.lr) train_op = optimizer.minimize(loss, global_step=global_step, name='train') update_op = net.updates return Computation(tf.group([train_op, update_op]), loss, global_step)