Ejemplo n.º 1
0
    def _build_actions(self):
        """Create the chosen action with an exploration policy.

        If inference mode is used the, actions are chosen directly without exploration.
        """
        batch_size = get_tensor_batch_size(self._train_results.q)
        exploration = self._build_exploration()

        if self.is_continuous:
            if exploration is None or Modes.is_infer(self.mode):
                return self._train_results.q

            # use exploration
            return self._train_results.q + exploration
        else:
            self._index_action = tf.argmax(self._train_results.q, axis=1)
            if exploration is None or Modes.is_infer(self.mode):
                return self._index_action

            # use exploration
            exploration_size = tf.concat(axis=0, values=[
                batch_size,
            ])
            should_explore = tf.random_uniform((), 0, 1) < exploration
            random_actions = tf.random_uniform(exploration_size, 0,
                                               self.num_actions, tf.int64)
            return tf.cond(should_explore, lambda: random_actions,
                           lambda: self._index_action)
Ejemplo n.º 2
0
    def _build(self,  # pylint: disable=arguments-differ
               features, labels, params=None, config=None):
        """Build the different operation of the model."""
        # Pre-process features and labels
        features, labels = self._preprocess(features, labels)
        results = self._call_graph_fn(features=features, labels=labels)

        loss = None
        train_op = None
        eval_metrics = None
        if Modes.is_infer(self.mode):
            predictions = self._build_predictions(results=results, features=features, labels=labels)
            extra_ops = self._build_extra_ops(results=results, features=features, labels=labels)
        else:
            _, loss = self._build_loss(results, features, labels)
            eval_metrics = self._build_eval_metrics(results, features, labels)

            if Modes.is_train(self.mode):
                train_op = self._build_train_op(loss)
                self._build_summary_op(results=results, features=features, labels=labels)

            predictions = self._build_predictions(results=results, features=features, labels=labels)
            extra_ops = self._build_extra_ops(results=results, features=features, labels=labels)

        track(predictions, tf.GraphKeys.PREDICTIONS)

        return EstimatorSpec(mode=self.mode,
                             predictions=predictions,
                             loss=loss,
                             extra_ops=extra_ops,
                             train_op=train_op,
                             eval_metric_ops=eval_metrics)
Ejemplo n.º 3
0
    def _build(self, features, labels=None, params=None, config=None):
        # Pre-process features and labels
        features, labels = self._preprocess(features, labels)
        results = self._call_graph_fn(features=features, labels=labels)
        if not isinstance(results, BridgeSpec):
            raise ValueError('`bridge_fn` should return a BridgeSpec.')

        loss = None
        train_op = None
        eval_metrics = None
        if Modes.is_infer(self.mode):
            predictions = self._build_predictions(
                results=results.results, features=features, labels=labels)
        else:
            _, loss = self._build_loss(results, features, features)
            eval_metrics = self._build_eval_metrics(results.results, features, features)

            if Modes.is_train(self.mode):
                train_op = self._build_train_op(loss)
                self._build_summary_op(results=results.results, features=features, labels=labels)

            predictions = self._build_predictions(
                results=results.results, features=features, labels=labels)

        track(predictions, tf.GraphKeys.PREDICTIONS)

        return EstimatorSpec(mode=self.mode,
                             predictions=predictions,
                             loss=loss,
                             train_op=train_op,
                             eval_metric_ops=eval_metrics)
Ejemplo n.º 4
0
    def _preprocess(self, features, labels):
        """Model specific preprocessing.

        Args:
            features: `array`, `Tensor` or `dict`. The environment states.
                if `dict` it must contain a `state` key.
            labels: `dict`. A dictionary containing `action`, `reward`, `advantage`.
        """
        features, labels = super(TRPOModel, self)._preprocess(features, labels)

        if not Modes.is_infer(self.mode) and 'dist_values' not in labels:
            raise KeyError("labels must include the keys: `dist_values`.")
        return features, labels
Ejemplo n.º 5
0
    def _preprocess(self, features, labels):
        """Model specific preprocessing.

        Args:
            features: `array`, `Tensor` or `dict`. The environment states.
                if `dict` it must contain a `state` key.
            labels: `dict`. A dictionary containing `action`, `reward`, `advantage`.
        """
        if isinstance(features, Mapping) and 'state' not in features:
            raise KeyError("features must include a `state` key.")

        if (not Modes.is_infer(self.mode)
                and ('action' not in labels or 'reward' not in labels
                     or 'done' not in labels)):
            raise KeyError(
                "labels must include these keys: `action`, `reward`, `done`.")
        return features, labels
Ejemplo n.º 6
0
    def _build_actions(self):
        """Create the chosen action w/o sampling.

        If inference mode is used the, actions are chosen directly without sampling.
        """
        batch_size = get_tensor_batch_size(self._graph_results.a)
        if self.is_deterministic or Modes.is_infer(self.mode):
            if self.is_continuous:
                return self._graph_results.distribution.mean()
            else:
                return tf.argmax(self._graph_results.distribution.probs,
                                 axis=1)
        else:
            if self.is_continuous:
                return self._graph_results.distribution.sample(
                    sample_shape=batch_size)
            else:
                return tf.squeeze(self._graph_results.distribution.sample(
                    sample_shape=batch_size),
                                  axis=1)
Ejemplo n.º 7
0
    def _build(self, features, labels, loss, encoder_fn, decoder_fn, *args,
               **kwargs):
        losses, loss = None, None
        if Modes.GENERATE == self.mode:
            results = self.decode(incoming=features,
                                  features=features,
                                  labels=labels,
                                  decoder_fn=decoder_fn)
        elif Modes.ENCODE == self.mode:
            results = self.encode(features=features,
                                  labels=labels,
                                  encoder_fn=encoder_fn)
        else:
            x = self.encode(features=features,
                            labels=labels,
                            encoder_fn=encoder_fn)
            results = self.decode(features=x,
                                  labels=labels,
                                  decoder_fn=decoder_fn)
            if not Modes.is_infer(self.mode):
                losses, loss = self._build_loss(results, features, labels,
                                                loss)

        return BridgeSpec(results=results, losses=losses, loss=loss)