Ejemplo n.º 1
0
    def _build_actions(self):
        """Create the chosen action with an exploration policy.

        If inference mode is used the, actions are chosen directly without exploration.
        """
        batch_size = get_tensor_batch_size(self._train_results.q)
        exploration = self._build_exploration()

        if self.is_continuous:
            if exploration is None or Modes.is_infer(self.mode):
                return self._train_results.q

            # use exploration
            return self._train_results.q + exploration
        else:
            self._index_action = tf.argmax(self._train_results.q, axis=1)
            if exploration is None or Modes.is_infer(self.mode):
                return self._index_action

            # use exploration
            exploration_size = tf.concat(axis=0, values=[
                batch_size,
            ])
            should_explore = tf.random_uniform((), 0, 1) < exploration
            random_actions = tf.random_uniform(exploration_size, 0,
                                               self.num_actions, tf.int64)
            return tf.cond(should_explore, lambda: random_actions,
                           lambda: self._index_action)
Ejemplo n.º 2
0
    def _build(self, features, labels, params=None, config=None):
        """Build the different operation of the model."""
        # Pre-process features and labels
        features, labels = self._preprocess(features, labels)
        results = self._call_graph_fn(features=features, labels=labels)

        loss = None
        train_op = None
        eval_metrics = None
        if Modes.is_infer(self.mode):
            predictions = self._build_predictions(results=results, features=features, labels=labels)
            extra_ops = self._build_extra_ops(results=results, features=features, labels=labels)
        else:
            losses, loss = self._build_loss(results, features, labels)
            eval_metrics = self._build_eval_metrics(results, features, labels)

            if Modes.is_train(self.mode):
                train_op = self._build_train_op(loss)
                self._build_summary_op(results=results, features=features, labels=labels)

            predictions = self._build_predictions(results=results, features=features, labels=labels)
            extra_ops = self._build_extra_ops(results=results, features=features, labels=labels)

        track(predictions, tf.GraphKeys.PREDICTIONS)

        return EstimatorSpec(mode=self.mode,
                             predictions=predictions,
                             loss=loss,
                             extra_ops=extra_ops,
                             train_op=train_op,
                             eval_metric_ops=eval_metrics)
Ejemplo n.º 3
0
    def _build(self, features, labels=None, params=None, config=None):
        # Pre-process features and labels
        features, labels = self._preprocess(features, labels)
        results = self._call_graph_fn(inputs=features)
        if not isinstance(results, BridgeSpec):
            raise ValueError('`bridge_fn` should return a BridgeSpec.')

        loss = None
        train_op = None
        eval_metrics = None
        if Modes.is_infer(self.mode):
            predictions = self._build_predictions(
                results=results.results, features=features, labels=labels)
        else:
            losses, loss = self._build_loss(results, features, features)
            eval_metrics = self._build_eval_metrics(results.results, features, features)

            if Modes.is_train(self.mode):
                train_op = self._build_train_op(loss)
                self._build_summary_op(results=results.results, features=features, labels=labels)

            predictions = self._build_predictions(
                results=results.results, features=features, labels=labels)

        # We add 'useful' tensors to the graph collection so that we
        # can easly find them in our hooks/monitors.
        track(predictions, tf.GraphKeys.PREDICTIONS)

        return EstimatorSpec(mode=self.mode,
                             predictions=predictions,
                             loss=loss,
                             train_op=train_op,
                             eval_metric_ops=eval_metrics)
Ejemplo n.º 4
0
    def _build(self, features, labels=None, params=None, config=None):
        # Pre-process features and labels
        features, labels = self._preprocess(features, labels)
        results = self._call_graph_fn(features=features, labels=labels)
        if not isinstance(results, BridgeSpec):
            raise ValueError('`bridge_fn` should return a BridgeSpec.')

        loss = None
        train_op = None
        eval_metrics = None
        if Modes.is_infer(self.mode):
            predictions = self._build_predictions(
                results=results.results, features=features, labels=labels)
        else:
            losses, loss = self._build_loss(results, features, features)
            eval_metrics = self._build_eval_metrics(results.results, features, features)

            if Modes.is_train(self.mode):
                train_op = self._build_train_op(loss)
                self._build_summary_op(results=results.results, features=features, labels=labels)

            predictions = self._build_predictions(
                results=results.results, features=features, labels=labels)

        track(predictions, tf.GraphKeys.PREDICTIONS)

        return EstimatorSpec(mode=self.mode,
                             predictions=predictions,
                             loss=loss,
                             train_op=train_op,
                             eval_metric_ops=eval_metrics)
Ejemplo n.º 5
0
    def _build(self,  # pylint: disable=arguments-differ
               features, labels, params=None, config=None):
        """Build the different operation of the model."""
        # Pre-process features and labels
        features, labels = self._preprocess(features, labels)
        results = self._call_graph_fn(features=features, labels=labels)

        loss = None
        train_op = None
        eval_metrics = None
        if Modes.is_infer(self.mode):
            predictions = self._build_predictions(results=results, features=features, labels=labels)
            extra_ops = self._build_extra_ops(results=results, features=features, labels=labels)
        else:
            _, loss = self._build_loss(results, features, labels)
            eval_metrics = self._build_eval_metrics(results, features, labels)

            if Modes.is_train(self.mode):
                train_op = self._build_train_op(loss)
                self._build_summary_op(results=results, features=features, labels=labels)

            predictions = self._build_predictions(results=results, features=features, labels=labels)
            extra_ops = self._build_extra_ops(results=results, features=features, labels=labels)

        track(predictions, tf.GraphKeys.PREDICTIONS)

        return EstimatorSpec(mode=self.mode,
                             predictions=predictions,
                             loss=loss,
                             extra_ops=extra_ops,
                             train_op=train_op,
                             eval_metric_ops=eval_metrics)
Ejemplo n.º 6
0
    def _build(self, incoming, loss_config, encoder_fn, decoder_fn, *args, **kwargs):
        losses, loss = None, None
        if Modes.GENERATE == self.mode:
            results = self.decode(incoming=incoming, decoder_fn=decoder_fn)
        elif Modes.ENCODE == self.mode:
            results = self.encode(incoming=incoming, encoder_fn=encoder_fn)
        else:
            x = self.encode(incoming=incoming, encoder_fn=encoder_fn)
            results = self.decode(incoming=x, decoder_fn=decoder_fn)
            if not Modes.is_infer(self.mode):
                losses, loss = self._build_loss(incoming, results, loss_config)

        return BridgeSpec(results=results, losses=losses, loss=loss)
Ejemplo n.º 7
0
    def _preprocess(self, features, labels):
        """Model specific preprocessing.

        Args:
            features: `array`, `Tensor` or `dict`. The environment states.
                if `dict` it must contain a `state` key.
            labels: `dict`. A dictionary containing `action`, `reward`, `advantage`.
        """
        features, labels = super(TRPOModel, self)._preprocess(features, labels)

        if not Modes.is_infer(self.mode) and 'dist_values' not in labels:
            raise KeyError("labels must include the keys: `dist_values`.")
        return features, labels
Ejemplo n.º 8
0
    def _build(self, features, labels, loss_config, encoder_fn, decoder_fn, *args, **kwargs):
        losses, loss = None, None
        if Modes.GENERATE == self.mode:
            results = self.decode(
                incoming=features, features=features, labels=labels, decoder_fn=decoder_fn)
        elif Modes.ENCODE == self.mode:
            results = self.encode(features=features, labels=labels, encoder_fn=encoder_fn)
        else:
            x = self.encode(features=features, labels=labels, encoder_fn=encoder_fn)
            results = self.decode(features=x, labels=labels, decoder_fn=decoder_fn)
            if not Modes.is_infer(self.mode):
                losses, loss = self._build_loss(results, features, labels, loss_config)

        return BridgeSpec(results=results, losses=losses, loss=loss)
Ejemplo n.º 9
0
    def _preprocess(self, features, labels):
        """Model specific preprocessing.

        Args:
            features: `array`, `Tensor` or `dict`. The environment states.
                if `dict` it must contain a `state` key.
            labels: `dict`. A dictionary containing `action`, `reward`, `advantage`.
        """
        features, labels = super(BasePGModel, self)._preprocess(features, labels)

        if not Modes.is_infer(self.mode) and 'discount_reward' not in labels:
            raise KeyError("labels must include the keys: `discount_reward``.")
        # TODO: add baseline here.
        return features, labels
Ejemplo n.º 10
0
Archivo: base.py Proyecto: chandu088/p
    def _preprocess(self, features, labels):
        """Model specific preprocessing.

        Args:
            features: `array`, `Tensor` or `dict`. The environment states.
                if `dict` it must contain a `state` key.
            labels: `dict`. A dictionary containing `action`, `reward`, `advantage`.
        """
        if isinstance(features, Mapping) and 'state' not in features:
            raise KeyError("features must include a `state` key.")

        if (not Modes.is_infer(self.mode) and
                ('action' not in labels or 'reward' not in labels or 'done' not in labels)):
            raise KeyError("labels must include these keys: `action`, `reward`, `done`.")
        return features, labels
Ejemplo n.º 11
0
    def _preprocess(self, features, labels):
        """Model specific preprocessing.

        Args:
            features: `array`, `Tensor` or `dict`. The environment states.
                if `dict` it must contain a `state` key.
            labels: `dict`. A dictionary containing `action`, `reward`, `advantage`.
        """
        if isinstance(features, Mapping) and 'state' not in features:
            raise KeyError("features must include a `state` key.")

        if (not Modes.is_infer(self.mode) and
                ('action' not in labels or 'reward' not in labels or 'done' not in labels)):
            raise KeyError("labels must include these keys: `action`, `reward`, `done`.")
        return features, labels
Ejemplo n.º 12
0
    def _build_actions(self):
        """Create the chosen action with an exploration policy.

        If inference mode is used the, actions are chosen directly without exploration.
        """
        batch_size = get_tensor_batch_size(self._train_results.q)
        exploration = self._build_exploration()

        if self.is_continuous:
            if exploration is None or Modes.is_infer(self.mode):
                return self._train_results.q

            # use exploration
            return self._train_results.q + exploration
        else:
            self._index_action = tf.argmax(self._train_results.q, axis=1)
            if exploration is None or Modes.is_infer(self.mode):
                return self._index_action

            # use exploration
            exploration_size = tf.concat(axis=0, values=[batch_size, ])
            should_explore = tf.random_uniform((), 0, 1) < exploration
            random_actions = tf.random_uniform(exploration_size, 0, self.num_actions, tf.int64)
            return tf.cond(should_explore, lambda: random_actions, lambda: self._index_action)
Ejemplo n.º 13
0
    def _prepare_input_fn(self, mode, env):
        """Creates placeholders for the model given the mode and the env.

        Args:
            mode: Specifies if this training, evaluation or prediction. See `Modes`.

        Returns:
            `tuple`: (features, labels).
                    features: `dict`. {state: array}
                    labels: `dict`. {action: array, reward: array, done: array}
        """
        if not isinstance(env, Environment):
            raise TypeError("`env` must be an instance of `Environment`, "
                            "got `{}`".format(type(env)))

        features = {'state': tf.placeholder(
            dtype=tf.float32, shape=[None, env.num_states], name='state')}

        if Modes.is_train(mode) or Modes.is_eval(mode):
            return (
                features,
                {
                    'action': tf.placeholder(
                        dtype=tf.float32 if env.is_continuous else tf.int64,
                        shape=(None, env.num_actions) if env.is_continuous else (None,),
                        name='action'),
                    'reward': tf.placeholder(dtype=tf.float32, shape=(None,), name='reward'),
                    'discount_reward': tf.placeholder(dtype=tf.float32, shape=(None,), name='discount_reward'),
                    'done': tf.placeholder(dtype=tf.bool, shape=(None,), name='done'),
                    'dist_values': tf.placeholder(
                        dtype=tf.float32,
                        shape=(None, env.num_actions * 2) if env.is_continuous else (None, env.num_actions),
                        name='dist_values'),
                    'tangents': tf.placeholder(dtype=tf.float32, shape=(None,), name='tangents'),
                    'theta': tf.placeholder(dtype=tf.float32, shape=(None,), name='theta'),

                    'max_reward': tf.placeholder(
                        dtype=tf.float32, shape=(), name='max_reward'),
                    'min_reward': tf.placeholder(
                        dtype=tf.float32, shape=(), name='min_reward'),
                    'avg_reward': tf.placeholder(
                        dtype=tf.float32, shape=(), name='avg_reward'),
                    'total_reward': tf.placeholder(
                        dtype=tf.float32, shape=(), name='total_reward'),
                }
            )
        if Modes.is_infer(mode):
            return features, None
Ejemplo n.º 14
0
    def _build_actions(self):
        """Create the chosen action w/o sampling.

        If inference mode is used the, actions are chosen directly without sampling.
        """
        batch_size = get_tensor_batch_size(self._graph_results.a)
        if self.is_deterministic or Modes.is_infer(self.mode):
            if self.is_continuous:
                return self._graph_results.distribution.mean()
            else:
                return tf.argmax(self._graph_results.distribution.probs, axis=1)
        else:
            if self.is_continuous:
                return self._graph_results.distribution.sample(sample_shape=batch_size)
            else:
                return tf.squeeze(self._graph_results.distribution.sample(sample_shape=batch_size), axis=1)
Ejemplo n.º 15
0
    def _prepare_input_fn(self, mode, env):
        """Creates placeholders for the model given the mode and the env.

        Args:
            mode: Specifies if this training, evaluation or prediction. See `Modes`.

        Returns:
            `tuple`: (features, labels).
                    features: `dict`. {state: array}
                    labels: `dict`. {action: array, reward: array, done: array}
        """
        if not isinstance(env, Environment):
            raise TypeError("`env` must be an instance of `Environment`, "
                            "got `{}`".format(type(env)))

        features = {
            'state':
            tf.placeholder(dtype=tf.float32,
                           shape=[None, env.num_states],
                           name='state')
        }

        if Modes.is_train(mode) or Modes.is_eval(mode):
            return (features, {
                'action':
                tf.placeholder(
                    dtype=tf.float32 if env.is_continuous else tf.int64,
                    shape=(None, env.num_actions) if env.is_continuous else
                    (None, ),
                    name='action'),
                'reward':
                tf.placeholder(dtype=tf.float32, shape=(None, ),
                               name='reward'),
                'done':
                tf.placeholder(dtype=tf.bool, shape=(None, ), name='done'),
                'max_reward':
                tf.placeholder(dtype=tf.float32, shape=(), name='max_reward'),
                'min_reward':
                tf.placeholder(dtype=tf.float32, shape=(), name='min_reward'),
                'avg_reward':
                tf.placeholder(dtype=tf.float32, shape=(), name='avg_reward'),
                'total_reward':
                tf.placeholder(dtype=tf.float32, shape=(),
                               name='total_reward'),
            })
        if Modes.is_infer(mode):
            return features, None
Ejemplo n.º 16
0
    def _build(self, features, labels, params=None, config=None):
        """Build the different operation of the model."""
        # Pre-process features and labels
        features, labels = self._preprocess(features, labels)
        results = self._call_graph_fn(inputs=features)

        loss = None
        train_op = None
        eval_metrics = None
        if Modes.is_infer(self.mode):
            predictions = self._build_predictions(results=results,
                                                  features=features,
                                                  labels=labels)
            extra_ops = self._build_extra_ops(results=results,
                                              features=features,
                                              labels=labels)
        else:
            losses, loss = self._build_loss(results, features, labels)
            eval_metrics = self._build_eval_metrics(results, features, labels)

            if Modes.is_train(self.mode):
                train_op = self._build_train_op(loss)
                self._build_summary_op(results=results,
                                       features=features,
                                       labels=labels)

            predictions = self._build_predictions(results=results,
                                                  features=features,
                                                  labels=labels)
            extra_ops = self._build_extra_ops(results=results,
                                              features=features,
                                              labels=labels)

        # We add 'useful' tensors to the graph collection so that we
        # can easily find them in our hooks/monitors.
        track(predictions, tf.GraphKeys.PREDICTIONS)

        return EstimatorSpec(mode=self.mode,
                             predictions=predictions,
                             loss=loss,
                             extra_ops=extra_ops,
                             train_op=train_op,
                             eval_metric_ops=eval_metrics)
Ejemplo n.º 17
0
    def _build(self, features, labels=None, params=None, config=None):
        # Pre-process features and labels
        features, labels = self._preprocess(features, labels)
        results = self._call_graph_fn(features=features, labels=labels)
        if not isinstance(results, BridgeSpec):
            raise ValueError('`bridge_fn` should return a BridgeSpec.')

        loss = None
        train_op = None
        eval_metrics = None
        if Modes.is_infer(self.mode):
            predictions = self._build_predictions(results=results.results,
                                                  features=features,
                                                  labels=labels)
        else:
            _, loss = self._build_loss(results, features, features)
            eval_metrics = self._build_eval_metrics(results.results, features,
                                                    features)

            if Modes.is_train(self.mode):
                train_op = self._build_train_op(loss)
                self._build_summary_op(results=results.results,
                                       features=features,
                                       labels=labels)

            predictions = self._build_predictions(results=results.results,
                                                  features=features,
                                                  labels=labels)

        track(predictions, tf.GraphKeys.PREDICTIONS)

        return EstimatorSpec(mode=self.mode,
                             predictions=predictions,
                             loss=loss,
                             train_op=train_op,
                             eval_metric_ops=eval_metrics)
Ejemplo n.º 18
0
    def _build(self, features, labels, loss, encoder_fn, decoder_fn, *args,
               **kwargs):
        losses, loss = None, None
        if Modes.GENERATE == self.mode:
            results = self.decode(incoming=features,
                                  features=features,
                                  labels=labels,
                                  decoder_fn=decoder_fn)
        elif Modes.ENCODE == self.mode:
            results = self.encode(features=features,
                                  labels=labels,
                                  encoder_fn=encoder_fn)
        else:
            x = self.encode(features=features,
                            labels=labels,
                            encoder_fn=encoder_fn)
            results = self.decode(features=x,
                                  labels=labels,
                                  decoder_fn=decoder_fn)
            if not Modes.is_infer(self.mode):
                losses, loss = self._build_loss(results, features, labels,
                                                loss)

        return BridgeSpec(results=results, losses=losses, loss=loss)