Ejemplo n.º 1
0
    def predict_depth(self, rgb, sensor_depth=None):
        del sensor_depth  # unused
        with tf.variable_scope('depth_prediction', reuse=tf.AUTO_REUSE):
            if self._mode == tf.estimator.ModeKeys.TRAIN:
                noise_stddev = 0.5
                global_step = tf.train.get_global_step()
                rampup_steps = self._params['layer_norm_noise_rampup_steps']
                if global_step is not None and rampup_steps > 0:
                    # If global_step is available, ramp up the noise.
                    noise_stddev *= tf.square(
                        tf.minimum(
                            tf.to_float(global_step) / float(rampup_steps),
                            1.0))
            else:
                noise_stddev = 0.0

            def _normalizer_fn(x, is_train, name='bn'):
                return randomized_layer_norm(x,
                                             is_train=is_train,
                                             name=name,
                                             stddev=noise_stddev)

            if self._params['learn_scale']:
                depth_scale = tf.get_variable('depth_scale', initializer=1.0)
                maybe_summary.scalar('depth_scale', depth_scale)
            else:
                depth_scale = 1.0

            return depth_scale * depth_prediction_resnet18unet(
                2 * rgb - 1.0,
                self._mode == tf.estimator.ModeKeys.TRAIN,
                self._params['weight_decay'],
                _normalizer_fn,
                reflect_padding=self._params['reflect_padding'])
def add_intrinsics_head(bottleneck, image_height, image_width):
  """Adds a head the preficts camera intrinsics.

  Args:
    bottleneck: A tf.Tensor of shape [B, 1, 1, C], typically the bottlenech
      features of a netrowk.
    image_height: A scalar tf.Tensor or an python scalar, the image height in
      pixels.
    image_width: A scalar tf.Tensor or an python scalar, the image width in
      pixels.

  image_height and image_width are used to provide the right scale for the focal
  length and the offest parameters.

  Returns:
    a tf.Tensor of shape [B, 3, 3], and type float32, where the 3x3 part is the
    intrinsic matrix: (fx, 0, x0), (0, fy, y0), (0, 0, 1).
  """
  with tf.variable_scope('CameraIntrinsics'):
    # Since the focal lengths in pixels tend to be in the order of magnitude of
    # the image width and height, we multiply the network prediction by them.
    focal_lengths = tf.squeeze(
        layers.conv2d(
            bottleneck,
            2, [1, 1],
            stride=1,
            activation_fn=tf.nn.softplus,
            weights_regularizer=None,
            scope='foci'),
        axis=(1, 2)) * tf.to_float(
            tf.convert_to_tensor([[image_width, image_height]]))

    # The pixel offsets tend to be around the center of the image, and they
    # are typically a fraction the image width and height in pixels. We thus
    # multiply the network prediction by the width and height, and the
    # additional 0.5 them by default at the center of the image.
    offsets = (tf.squeeze(
        layers.conv2d(
            bottleneck,
            2, [1, 1],
            stride=1,
            activation_fn=None,
            weights_regularizer=None,
            biases_initializer=None,
            scope='offsets'),
        axis=(1, 2)) + 0.5) * tf.to_float(
            tf.convert_to_tensor([[image_width, image_height]]))

    foci = tf.linalg.diag(focal_lengths)

    maybe_summary.scalar('foci', tf.reduce_mean(foci))
    maybe_summary.scalar('offsets', tf.reduce_mean(offsets))

    intrinsic_mat = tf.concat([foci, tf.expand_dims(offsets, -1)], axis=2)
    batch_size = tf.shape(bottleneck)[0]
    last_row = tf.tile([[[0.0, 0.0, 1.0]]], [batch_size, 1, 1])
    intrinsic_mat = tf.concat([intrinsic_mat, last_row], axis=1)
    return intrinsic_mat
Ejemplo n.º 3
0
def _build_estimator_spec(losses, trainer_params, mode, use_tpu=False):
    """Builds an EstimatorSpec/TPUEstimatorSpec based on trainer_params.

  Args:
    losses: A dictionary of {string: tf.Tensor} containing the various losses.
      The keys will be used as display names for the summaries, the values will
      be summed up to obtain the total loss, which is to be minimized.
    trainer_params: A ParameterContainer object with parameters relevant to the
      training.
    mode: One of tf.estimator.ModeKeys: TRAIN, PREDICT or EVAL.
    use_tpu: A boolean, if True, a TPU-compatible version of EstimatorSpec will
      be built.

  Returns:
    A EstimatorSpec or a TPUEstimatorSpec object.
  """
    if mode == tf.estimator.ModeKeys.TRAIN:
        total_loss = 0.0
        for loss_name, loss in six.iteritems(losses):
            if not use_tpu:
                tf.summary.scalar('Loss/%s' % loss_name, loss)
            total_loss += loss

        learning_rate = trainer_params.learning_rate
        maybe_summary.scalar('Learning Rate', learning_rate)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                           beta1=0.9)
        optimizer = contrib_estimator.clip_gradients_by_norm(
            optimizer, trainer_params.clip_gradients)

        if use_tpu:
            optimizer = tf.tpu.CrossShardOptimizer(optimizer)

        train_op = optimizer.minimize(total_loss,
                                      global_step=tf.train.get_global_step())
    else:
        total_loss = None
        train_op = None

    if use_tpu:
        estimator_spec = tf.estimator.tpu.TPUEstimatorSpec(
            mode=tf.estimator.ModeKeys.TRAIN,
            loss=total_loss,
            train_op=train_op)
    else:
        estimator_spec = tf.estimator.EstimatorSpec(
            mode=tf.estimator.ModeKeys.TRAIN,
            loss=total_loss,
            train_op=train_op)

    return estimator_spec
def create_scales(constraint_minimum):
    """Creates variables representing rotation and translation scaling factors.

    Args:
      constraint_minimum: A scalar, the variables will be constrained to not fall
        below it.

    Returns:
      Two scalar variables, rotation and translation scale.
    """
    def constraint(x):
        return tf.nn.relu(x - constraint_minimum) + constraint_minimum

    with tf.variable_scope('Scales', initializer=0.01, constraint=constraint):
        rot_scale = tf.get_variable('rotation')
        trans_scale = tf.get_variable('translation')
        maybe_summary.scalar('rotation', rot_scale)
        maybe_summary.scalar('translation', trans_scale)

    return rot_scale, trans_scale
Ejemplo n.º 5
0
def create_and_fetch_intrinsics_per_video_index(video_index,
                                                height,
                                                width,
                                                max_video_index=1000,
                                                num_summaries=10):
    """Fetches the intrinsic mcatrix of a batch of video index.

  Args:
    video_index: A batch of scalars (int32-s) representing video indices, must
      be in [0, max_video_index).
    height: Image height in pixels.
    width: Image width in pixels.
    max_video_index: Maximum video_index (video_index < max_video_index).
    num_summaries: Number of video_indices for which intrinsics will be
      displayed on TensorBoard.

  Returns:
    A batch of intrinsics matrices (shape: [B, 3, 3], where B is the length of
    `video_index`
  """
    intrin_initializer = tf.tile([[1.0, 1.0, 0.5, 0.5]], [max_video_index, 1])
    intrin_factors = tf.compat.v1.get_variable('all_intrin',
                                               initializer=intrin_initializer)

    batch_factors = tf.gather(intrin_factors, video_index)
    fx, fy, x0, y0 = _get_intrinsics_from_coefficients(batch_factors, height,
                                                       width)
    zero = tf.zeros_like(fx)
    one = tf.ones_like(fx)
    int_mat = [[fx, zero, x0], [zero, fy, y0], [zero, zero, one]]
    int_mat = tf.transpose(int_mat, [2, 0, 1])

    if num_summaries > 0:
        fx, fy, x0, y0 = _get_intrinsics_from_coefficients(
            intrin_factors, height, width)
        for i in range(num_summaries):
            maybe_summary.scalar('intrinsics/0%d/fx' % i, fx[i])
            maybe_summary.scalar('intrinsics/0%d/fy' % i, fy[i])
            maybe_summary.scalar('intrinsics/0%d/x0' % i, x0[i])
            maybe_summary.scalar('intrinsics/0%d/y0' % i, y0[i])

    maybe_summary.histogram('intrinsics/fx', fx)
    maybe_summary.histogram('intrinsics/fy', fy)
    maybe_summary.histogram('intrinsics/x0', x0)
    maybe_summary.histogram('intrinsics/y0', y0)

    return int_mat