Example #1
0
  def single_batch_a_func(
      self, features, scope,
      mode,
      context_fn, reuse,
      config,
      params):
    """Single step action predictor when there is a single batch dim."""
    del config
    with tf.variable_scope(scope, reuse=reuse, use_resource=True):
      with tf.variable_scope('state_features', reuse=reuse, use_resource=True):
        feature_points, end_points = vision_layers.BuildImagesToFeaturesModel(
            features.image,
            is_training=(mode == TRAIN),
            normalizer_fn=tf.contrib.layers.layer_norm)

      if context_fn:
        feature_points = context_fn(feature_points)

      if params and params.get('is_inner_loop', False):
        if self._predict_con_gripper_pose:
          gripper_pose = self._predict_gripper_pose(feature_points)
        else:
          gripper_pose = tf.zeros_like(features.gripper_pose)
      else:
        gripper_pose = features.gripper_pose

      action, _ = vision_layers.BuildImageFeaturesToPoseModel(
          feature_points, aux_input=gripper_pose, num_outputs=self._action_size)
      action = self._output_mean + self._output_stddev * action
    return {
        'action': action,
        'image': features.image,
        'feature_points': feature_points,
        'softmax': end_points['softmax'],
    }
Example #2
0
def embed_condition_images(
    condition_image,
    scope,
    reuse=tf.AUTO_REUSE,
    fc_layers = None):
  """Independently embed a (meta)-batch of images.

  Args:
    condition_image: A rank 4 tensor of images: [N, H, W, C].
    scope: Name of the tf variable_scope.
    reuse: The variable_scope reuse setting.
    fc_layers: An optional tuple of ints describing the number of units in each
      fully-connected hidden layer.
  Returns:
    A rank 2 tensor of embeddings: [N, embedding size].
  Raises:
    ValueError if `condition_image` has incorrect rank.
  """
  if len(condition_image.shape) != 4:
    raise ValueError(
        'Image has unexpected shape {}.'.format(condition_image.shape))
  with tf.variable_scope(scope, reuse=reuse, use_resource=True):
    image_embedding, _ = vision_layers.BuildImagesToFeaturesModel(
        condition_image)
    if fc_layers is not None:
      image_embedding = layers.stack(
          image_embedding,
          layers.fully_connected,
          fc_layers[:-1],
          activation_fn=tf.nn.relu,
          normalizer_fn=layers.layer_norm)
      image_embedding = layers.fully_connected(
          image_embedding, fc_layers[-1], activation_fn=None)
  return image_embedding
Example #3
0
    def a_func(
        self,
        features,
        scope,
        mode,
        config=None,
        params=None,
        reuse=tf.AUTO_REUSE,
        context_fn=None,
    ):
        """A (state) regression function.

    This function can return a stochastic or a deterministic tensor.

    Args:
      features: This is the first item returned from the input_fn and parsed by
        tensorspec_utils.validate_and_pack. A spec_structure which fulfills the
        requirements of the self.get_feature_spefication.
      scope: String specifying variable scope.
      mode: (ModeKeys) Specifies if this is training, evaluation or prediction.
      config: Optional configuration object. Will receive what is passed to
        Estimator in config parameter, or the default config. Allows updating
        things in your model_fn based on configuration such as num_ps_replicas,
        or model_dir.
      params: An optional dict of hyper parameters that will be passed into
        input_fn and model_fn. Keys are names of parameters, values are basic
        python types. There are reserved keys for TPUEstimator, including
        'batch_size'.
      reuse: Whether or not to reuse variables under variable scope 'scope'.
      context_fn: Optional python function that takes in features and returns
        new features of same shape. For merging information like in RL^2.

    Returns:
      outputs: A {key: Tensor} mapping. The key 'action' is required.
    """
        del config
        is_training = mode == TRAIN
        image = tf.image.convert_image_dtype(features.state, tf.float32)
        with tf.variable_scope(scope, reuse=reuse, use_resource=True):
            with tf.variable_scope('state_features',
                                   reuse=reuse,
                                   use_resource=True):
                feature_points, end_points = vision_layers.BuildImagesToFeaturesModel(
                    image,
                    is_training=is_training,
                    normalizer_fn=layers.layer_norm)
            del end_points
            if context_fn:
                feature_points = context_fn(feature_points)
            estimated_pose, _ = vision_layers.BuildImageFeaturesToPoseModel(
                feature_points, num_outputs=self._action_size)
        return {
            'inference_output': estimated_pose,
            'state_features': feature_points
        }
Example #4
0
def embed_condition_images(condition_image,
                           scope,
                           reuse=tf.AUTO_REUSE,
                           fc_layers=None,
                           use_spatial_softmax=True):
    """Independently embed a (meta)-batch of images.

  Args:
    condition_image: A rank 4 tensor of images: [N, H, W, C].
    scope: Name of the tf variable_scope.
    reuse: The variable_scope reuse setting.
    fc_layers: An optional tuple of ints describing the number of units in each
      fully-connected hidden layer, or 1x1 conv layer when excluding spatial
      softmax.
    use_spatial_softmax: Whether to use a spatial softmax or not.

  Returns:
    A rank 2 tensor of embeddings: [N, embedding size] if spatial_softmax is
    True. Otherwise, a rank 4 tensor of visual features [N, H, W, embedding
    size]
  Raises:
    ValueError if `condition_image` has incorrect rank.
  """
    if len(condition_image.shape) != 4:
        raise ValueError('Image has unexpected shape {}.'.format(
            condition_image.shape))
    with tf.variable_scope(scope, reuse=reuse, use_resource=True):
        image_embedding, _ = vision_layers.BuildImagesToFeaturesModel(
            condition_image, use_spatial_softmax=use_spatial_softmax)
        if fc_layers is not None:
            if len(image_embedding.shape) == 2:
                image_embedding = layers.stack(image_embedding,
                                               layers.fully_connected,
                                               fc_layers[:-1],
                                               activation_fn=tf.nn.relu,
                                               normalizer_fn=layers.layer_norm)
                image_embedding = layers.fully_connected(image_embedding,
                                                         fc_layers[-1],
                                                         activation_fn=None)
            else:
                image_embedding = layers.stack(image_embedding,
                                               layers.conv2d,
                                               fc_layers[:-1],
                                               kernel_size=[1, 1],
                                               activation_fn=tf.nn.relu,
                                               normalizer_fn=layers.layer_norm)
                image_embedding = layers.conv2d(image_embedding,
                                                fc_layers[-1],
                                                activation_fn=None)
    return image_embedding
Example #5
0
    def _single_batch_a_func(self,
                             features,
                             scope,
                             mode,
                             context_fn=None,
                             reuse=tf.AUTO_REUSE):
        """A state -> action regression function that expects a single batch dim."""
        gripper_pose = features.gripper_pose if self._use_gripper_input else None
        with tf.variable_scope(scope, reuse=reuse, use_resource=True):
            with tf.variable_scope('state_features',
                                   reuse=reuse,
                                   use_resource=True):
                feature_points, end_points = vision_layers.BuildImagesToFeaturesModel(
                    features.image,
                    is_training=(mode == TRAIN),
                    normalizer_fn=tf.contrib.layers.layer_norm)

            if context_fn:
                feature_points = context_fn(feature_points)

            fc_input = tf.concat([feature_points, gripper_pose], -1)
            outputs = {}
            if self._num_mixture_components > 1:
                dist_params = mdn.predict_mdn_params(
                    fc_input,
                    self._num_mixture_components,
                    self._action_size,
                    condition_sigmas=self._condition_mixture_stddev)
                gm = mdn.get_mixture_distribution(
                    dist_params, self._num_mixture_components,
                    self._action_size,
                    self._output_mean if self._normalize_outputs else None)
                if self._output_mixture_sample:
                    # Output a mixture sample as action.
                    action = gm.sample()
                else:
                    action = mdn.gaussian_mixture_approximate_mode(gm)
                outputs['dist_params'] = dist_params
            else:
                action, _ = vision_layers.BuildImageFeaturesToPoseModel(
                    fc_input, num_outputs=self._action_size)
                action = self._output_mean + self._output_stddev * action
        outputs.update({
            'action': action,
            'image': features.image,
            'feature_points': feature_points,
            'softmax': end_points['softmax']
        })
        return outputs