def _build_actions(self): """Create the chosen action with an exploration policy. If inference mode is used the, actions are chosen directly without exploration. """ batch_size = get_tensor_batch_size(self._train_results.q) exploration = self._build_exploration() if self.is_continuous: if exploration is None or Modes.is_infer(self.mode): return self._train_results.q # use exploration return self._train_results.q + exploration else: self._index_action = tf.argmax(self._train_results.q, axis=1) if exploration is None or Modes.is_infer(self.mode): return self._index_action # use exploration exploration_size = tf.concat(axis=0, values=[ batch_size, ]) should_explore = tf.random_uniform((), 0, 1) < exploration random_actions = tf.random_uniform(exploration_size, 0, self.num_actions, tf.int64) return tf.cond(should_explore, lambda: random_actions, lambda: self._index_action)
def _build(self, # pylint: disable=arguments-differ features, labels, params=None, config=None): """Build the different operation of the model.""" # Pre-process features and labels features, labels = self._preprocess(features, labels) results = self._call_graph_fn(features=features, labels=labels) loss = None train_op = None eval_metrics = None if Modes.is_infer(self.mode): predictions = self._build_predictions(results=results, features=features, labels=labels) extra_ops = self._build_extra_ops(results=results, features=features, labels=labels) else: _, loss = self._build_loss(results, features, labels) eval_metrics = self._build_eval_metrics(results, features, labels) if Modes.is_train(self.mode): train_op = self._build_train_op(loss) self._build_summary_op(results=results, features=features, labels=labels) predictions = self._build_predictions(results=results, features=features, labels=labels) extra_ops = self._build_extra_ops(results=results, features=features, labels=labels) track(predictions, tf.GraphKeys.PREDICTIONS) return EstimatorSpec(mode=self.mode, predictions=predictions, loss=loss, extra_ops=extra_ops, train_op=train_op, eval_metric_ops=eval_metrics)
def _build(self, features, labels=None, params=None, config=None): # Pre-process features and labels features, labels = self._preprocess(features, labels) results = self._call_graph_fn(features=features, labels=labels) if not isinstance(results, BridgeSpec): raise ValueError('`bridge_fn` should return a BridgeSpec.') loss = None train_op = None eval_metrics = None if Modes.is_infer(self.mode): predictions = self._build_predictions( results=results.results, features=features, labels=labels) else: _, loss = self._build_loss(results, features, features) eval_metrics = self._build_eval_metrics(results.results, features, features) if Modes.is_train(self.mode): train_op = self._build_train_op(loss) self._build_summary_op(results=results.results, features=features, labels=labels) predictions = self._build_predictions( results=results.results, features=features, labels=labels) track(predictions, tf.GraphKeys.PREDICTIONS) return EstimatorSpec(mode=self.mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metrics)
def _preprocess(self, features, labels): """Model specific preprocessing. Args: features: `array`, `Tensor` or `dict`. The environment states. if `dict` it must contain a `state` key. labels: `dict`. A dictionary containing `action`, `reward`, `advantage`. """ features, labels = super(TRPOModel, self)._preprocess(features, labels) if not Modes.is_infer(self.mode) and 'dist_values' not in labels: raise KeyError("labels must include the keys: `dist_values`.") return features, labels
def _preprocess(self, features, labels): """Model specific preprocessing. Args: features: `array`, `Tensor` or `dict`. The environment states. if `dict` it must contain a `state` key. labels: `dict`. A dictionary containing `action`, `reward`, `advantage`. """ if isinstance(features, Mapping) and 'state' not in features: raise KeyError("features must include a `state` key.") if (not Modes.is_infer(self.mode) and ('action' not in labels or 'reward' not in labels or 'done' not in labels)): raise KeyError( "labels must include these keys: `action`, `reward`, `done`.") return features, labels
def _build_actions(self): """Create the chosen action w/o sampling. If inference mode is used the, actions are chosen directly without sampling. """ batch_size = get_tensor_batch_size(self._graph_results.a) if self.is_deterministic or Modes.is_infer(self.mode): if self.is_continuous: return self._graph_results.distribution.mean() else: return tf.argmax(self._graph_results.distribution.probs, axis=1) else: if self.is_continuous: return self._graph_results.distribution.sample( sample_shape=batch_size) else: return tf.squeeze(self._graph_results.distribution.sample( sample_shape=batch_size), axis=1)
def _build(self, features, labels, loss, encoder_fn, decoder_fn, *args, **kwargs): losses, loss = None, None if Modes.GENERATE == self.mode: results = self.decode(incoming=features, features=features, labels=labels, decoder_fn=decoder_fn) elif Modes.ENCODE == self.mode: results = self.encode(features=features, labels=labels, encoder_fn=encoder_fn) else: x = self.encode(features=features, labels=labels, encoder_fn=encoder_fn) results = self.decode(features=x, labels=labels, decoder_fn=decoder_fn) if not Modes.is_infer(self.mode): losses, loss = self._build_loss(results, features, labels, loss) return BridgeSpec(results=results, losses=losses, loss=loss)