def single_batch_a_func( self, features, scope, mode, context_fn, reuse, config, params): """Single step action predictor when there is a single batch dim.""" del config with tf.variable_scope(scope, reuse=reuse, use_resource=True): with tf.variable_scope('state_features', reuse=reuse, use_resource=True): feature_points, end_points = vision_layers.BuildImagesToFeaturesModel( features.image, is_training=(mode == TRAIN), normalizer_fn=tf.contrib.layers.layer_norm) if context_fn: feature_points = context_fn(feature_points) if params and params.get('is_inner_loop', False): if self._predict_con_gripper_pose: gripper_pose = self._predict_gripper_pose(feature_points) else: gripper_pose = tf.zeros_like(features.gripper_pose) else: gripper_pose = features.gripper_pose action, _ = vision_layers.BuildImageFeaturesToPoseModel( feature_points, aux_input=gripper_pose, num_outputs=self._action_size) action = self._output_mean + self._output_stddev * action return { 'action': action, 'image': features.image, 'feature_points': feature_points, 'softmax': end_points['softmax'], }
def a_func( self, features, scope, mode, config=None, params=None, reuse=tf.AUTO_REUSE, context_fn=None, ): """A (state) regression function. This function can return a stochastic or a deterministic tensor. Args: features: This is the first item returned from the input_fn and parsed by tensorspec_utils.validate_and_pack. A spec_structure which fulfills the requirements of the self.get_feature_spefication. scope: String specifying variable scope. mode: (ModeKeys) Specifies if this is training, evaluation or prediction. config: Optional configuration object. Will receive what is passed to Estimator in config parameter, or the default config. Allows updating things in your model_fn based on configuration such as num_ps_replicas, or model_dir. params: An optional dict of hyper parameters that will be passed into input_fn and model_fn. Keys are names of parameters, values are basic python types. There are reserved keys for TPUEstimator, including 'batch_size'. reuse: Whether or not to reuse variables under variable scope 'scope'. context_fn: Optional python function that takes in features and returns new features of same shape. For merging information like in RL^2. Returns: outputs: A {key: Tensor} mapping. The key 'action' is required. """ del config is_training = mode == TRAIN image = tf.image.convert_image_dtype(features.state, tf.float32) with tf.variable_scope(scope, reuse=reuse, use_resource=True): with tf.variable_scope('state_features', reuse=reuse, use_resource=True): feature_points, end_points = vision_layers.BuildImagesToFeaturesModel( image, is_training=is_training, normalizer_fn=layers.layer_norm) del end_points if context_fn: feature_points = context_fn(feature_points) estimated_pose, _ = vision_layers.BuildImageFeaturesToPoseModel( feature_points, num_outputs=self._action_size) return { 'inference_output': estimated_pose, 'state_features': feature_points }
def _single_batch_a_func(self, features, scope, mode, context_fn=None, reuse=tf.AUTO_REUSE): """A state -> action regression function that expects a single batch dim.""" gripper_pose = features.gripper_pose if self._use_gripper_input else None with tf.variable_scope(scope, reuse=reuse, use_resource=True): with tf.variable_scope('state_features', reuse=reuse, use_resource=True): feature_points, end_points = vision_layers.BuildImagesToFeaturesModel( features.image, is_training=(mode == TRAIN), normalizer_fn=tf.contrib.layers.layer_norm) if context_fn: feature_points = context_fn(feature_points) fc_input = tf.concat([feature_points, gripper_pose], -1) outputs = {} if self._num_mixture_components > 1: dist_params = mdn.predict_mdn_params( fc_input, self._num_mixture_components, self._action_size, condition_sigmas=self._condition_mixture_stddev) gm = mdn.get_mixture_distribution( dist_params, self._num_mixture_components, self._action_size, self._output_mean if self._normalize_outputs else None) if self._output_mixture_sample: # Output a mixture sample as action. action = gm.sample() else: action = mdn.gaussian_mixture_approximate_mode(gm) outputs['dist_params'] = dist_params else: action, _ = vision_layers.BuildImageFeaturesToPoseModel( fc_input, num_outputs=self._action_size) action = self._output_mean + self._output_stddev * action outputs.update({ 'action': action, 'image': features.image, 'feature_points': feature_points, 'softmax': end_points['softmax'] }) return outputs
def predict_mdn_params(inputs, num_alphas, sample_size, condition_sigmas=False, aux_output_dim=0): """Outputs parameters of a mixture density network given inputs. Args: inputs: A tensor input to compute the MDN parameters from. num_alphas: The number of mixture components. sample_size: Scalar, the size of a single distribution sample. condition_sigmas: If True, the sigma params are conditioned on `inputs`. Otherwise they are simply learned variables. aux_output_dim: dimensionality of any auxiliary outputs. Returns: dist_params: A tensor of shape [..., num_alphas + 2 * num_alphas * sample_size] aux_output: auxiliary output of shape [..., aux_output_dim] if aux_output_dim is > 0. """ num_mus = num_alphas * sample_size # Assume isotropic gaussian components. num_sigmas = num_alphas * sample_size num_fc_outputs = num_alphas + num_mus if condition_sigmas: num_fc_outputs = num_fc_outputs + num_sigmas dist_params, aux_output = vision_layers.BuildImageFeaturesToPoseModel( inputs, num_outputs=num_fc_outputs, aux_output_dim=aux_output_dim) if not condition_sigmas: # Sigmas initialized so that softplus(sigmas) = 1. sigmas = tf.get_variable('mdn_stddev_inputs', shape=[num_sigmas], dtype=tf.float32, initializer=tf.constant_initializer( np.log(np.e - 1))) tiled_sigmas = tf.tile(sigmas[None], tf.stack([tf.shape(dist_params)[0], 1])) dist_params = tf.concat([dist_params, tiled_sigmas], axis=-1) return dist_params, aux_output