Beispiel #1
0
    def _build_actions(self):
        """Create the chosen action with an exploration policy.

        If inference mode is used the, actions are chosen directly without exploration.
        """
        batch_size = get_tensor_batch_size(self._train_results.q)
        exploration = self._build_exploration()

        if self.is_continuous:
            if exploration is None or Modes.is_infer(self.mode):
                return self._train_results.q

            # use exploration
            return self._train_results.q + exploration
        else:
            self._index_action = tf.argmax(self._train_results.q, axis=1)
            if exploration is None or Modes.is_infer(self.mode):
                return self._index_action

            # use exploration
            exploration_size = tf.concat(axis=0, values=[
                batch_size,
            ])
            should_explore = tf.random_uniform((), 0, 1) < exploration
            random_actions = tf.random_uniform(exploration_size, 0,
                                               self.num_actions, tf.int64)
            return tf.cond(should_explore, lambda: random_actions,
                           lambda: self._index_action)
Beispiel #2
0
    def _build(self,  # pylint: disable=arguments-differ
               features, labels, params=None, config=None):
        """Build the different operation of the model."""
        # Pre-process features and labels
        features, labels = self._preprocess(features, labels)
        results = self._call_graph_fn(features=features, labels=labels)

        loss = None
        train_op = None
        eval_metrics = None
        if Modes.is_infer(self.mode):
            predictions = self._build_predictions(results=results, features=features, labels=labels)
            extra_ops = self._build_extra_ops(results=results, features=features, labels=labels)
        else:
            _, loss = self._build_loss(results, features, labels)
            eval_metrics = self._build_eval_metrics(results, features, labels)

            if Modes.is_train(self.mode):
                train_op = self._build_train_op(loss)
                self._build_summary_op(results=results, features=features, labels=labels)

            predictions = self._build_predictions(results=results, features=features, labels=labels)
            extra_ops = self._build_extra_ops(results=results, features=features, labels=labels)

        track(predictions, tf.GraphKeys.PREDICTIONS)

        return EstimatorSpec(mode=self.mode,
                             predictions=predictions,
                             loss=loss,
                             extra_ops=extra_ops,
                             train_op=train_op,
                             eval_metric_ops=eval_metrics)
Beispiel #3
0
    def _build(self, features, labels=None, params=None, config=None):
        # Pre-process features and labels
        features, labels = self._preprocess(features, labels)
        results = self._call_graph_fn(features=features, labels=labels)
        if not isinstance(results, BridgeSpec):
            raise ValueError('`bridge_fn` should return a BridgeSpec.')

        loss = None
        train_op = None
        eval_metrics = None
        if Modes.is_infer(self.mode):
            predictions = self._build_predictions(
                results=results.results, features=features, labels=labels)
        else:
            _, loss = self._build_loss(results, features, features)
            eval_metrics = self._build_eval_metrics(results.results, features, features)

            if Modes.is_train(self.mode):
                train_op = self._build_train_op(loss)
                self._build_summary_op(results=results.results, features=features, labels=labels)

            predictions = self._build_predictions(
                results=results.results, features=features, labels=labels)

        track(predictions, tf.GraphKeys.PREDICTIONS)

        return EstimatorSpec(mode=self.mode,
                             predictions=predictions,
                             loss=loss,
                             train_op=train_op,
                             eval_metric_ops=eval_metrics)
Beispiel #4
0
    def _preprocess(self, features, labels):
        """Model specific preprocessing.

        Args:
            features: `array`, `Tensor` or `dict`. The environment states.
                if `dict` it must contain a `state` key.
            labels: `dict`. A dictionary containing `action`, `reward`, `advantage`.
        """
        features, labels = super(TRPOModel, self)._preprocess(features, labels)

        if not Modes.is_infer(self.mode) and 'dist_values' not in labels:
            raise KeyError("labels must include the keys: `dist_values`.")
        return features, labels
Beispiel #5
0
    def _call_graph_fn(self, features, labels=None):
        """Calls graph function.

        Args:
            features: `Tensor` or `dict` of tensors
            labels: `Tensor` or `dict` of tensors
        """
        set_learning_phase(Modes.is_train(self.mode))

        kwargs = {}
        if 'labels' in get_arguments(self._graph_fn):
            kwargs['labels'] = labels
        return self._graph_fn(mode=self.mode, features=features, **kwargs)
Beispiel #6
0
 def get_pipeline_config(mode):
     return TFRecordImagePipelineConfig(
         dynamic_pad=False,
         data_files=train_data_file
         if Modes.is_train(mode) else eval_data_file,
         meta_data_file=meta_data_filename,
         feature_processors=FeatureProcessorsConfig({
             'image':
             GraphConfig(input_layers=[['image', 0, 0]],
                         output_layers=[['image_out', 0, 0]],
                         layers=[
                             CastConfig(dtype='float32',
                                        name='image_out',
                                        inbound_nodes=[['image', 0, 0]])
                         ])
         }))
Beispiel #7
0
    def _preprocess(self, features, labels):
        """Model specific preprocessing.

        Args:
            features: `array`, `Tensor` or `dict`. The environment states.
                if `dict` it must contain a `state` key.
            labels: `dict`. A dictionary containing `action`, `reward`, `advantage`.
        """
        if isinstance(features, Mapping) and 'state' not in features:
            raise KeyError("features must include a `state` key.")

        if (not Modes.is_infer(self.mode)
                and ('action' not in labels or 'reward' not in labels
                     or 'done' not in labels)):
            raise KeyError(
                "labels must include these keys: `action`, `reward`, `done`.")
        return features, labels
Beispiel #8
0
    def _call_graph_fn(self, features, labels=None):
        """Calls graph function.

        Creates first one or two graph, i.e. train and target graphs.
        Return the optimal action given an exploration policy.

        If `is_dueling` is set to `True`,
        then another layer is added that represents the state value.

        Args:
            inputs: `Tensor` or `dict` of tensors
        """
        set_learning_phase(Modes.is_train(self.mode))

        graph_fn = self._build_graph_fn()
        self._graph_results = graph_fn(mode=self.mode,
                                       features=features,
                                       labels=labels)
        return self._build_actions()
Beispiel #9
0
    def _build_actions(self):
        """Create the chosen action w/o sampling.

        If inference mode is used the, actions are chosen directly without sampling.
        """
        batch_size = get_tensor_batch_size(self._graph_results.a)
        if self.is_deterministic or Modes.is_infer(self.mode):
            if self.is_continuous:
                return self._graph_results.distribution.mean()
            else:
                return tf.argmax(self._graph_results.distribution.probs,
                                 axis=1)
        else:
            if self.is_continuous:
                return self._graph_results.distribution.sample(
                    sample_shape=batch_size)
            else:
                return tf.squeeze(self._graph_results.distribution.sample(
                    sample_shape=batch_size),
                                  axis=1)
Beispiel #10
0
    def _call_graph_fn(self, features, labels=None):
        """Calls graph function.

        Creates first one or two graph, i.e. train and target graphs.
        Return the optimal action given an exploration policy.

        If `is_dueling` is set to `True`,
        then another layer is added that represents the state value.

        Args:
            features: `Tensor` or `dict` of tensors
            labels: `Tensor` or `dict` of tensors
        """
        set_learning_phase(Modes.is_train(self.mode))

        graph_fn = self._build_graph_fn()

        if self.use_target_graph:
            # We create 2 graphs: a training graph and a target graph,
            # so that we can copy one graph to another given a frequency.
            self._train_graph = FunctionModule(mode=self.mode,
                                               build_fn=graph_fn,
                                               name='train')
            self._train_results = self._train_graph(features=features,
                                                    labels=labels)
            self._target_graph = FunctionModule(mode=self.mode,
                                                build_fn=graph_fn,
                                                name='target')
            self._target_results = self._target_graph(features=features,
                                                      labels=labels)
            return self._build_actions()
        else:
            self._train_results = graph_fn(mode=self.mode,
                                           features=features,
                                           labels=labels)
            self._target_results = self._train_results
            return self._build_actions()
Beispiel #11
0
    def _build(self, features, labels, loss, encoder_fn, decoder_fn, *args,
               **kwargs):
        losses, loss = None, None
        if Modes.GENERATE == self.mode:
            results = self.decode(incoming=features,
                                  features=features,
                                  labels=labels,
                                  decoder_fn=decoder_fn)
        elif Modes.ENCODE == self.mode:
            results = self.encode(features=features,
                                  labels=labels,
                                  encoder_fn=encoder_fn)
        else:
            x = self.encode(features=features,
                            labels=labels,
                            encoder_fn=encoder_fn)
            results = self.decode(features=x,
                                  labels=labels,
                                  decoder_fn=decoder_fn)
            if not Modes.is_infer(self.mode):
                losses, loss = self._build_loss(results, features, labels,
                                                loss)

        return BridgeSpec(results=results, losses=losses, loss=loss)
Beispiel #12
0
    def from_config(cls, mode, features, labels, config):  # pylint: disable=arguments-differ
        """Instantiates a Graph container from its config (output of `get_config()`).

        Arguments:
            mode:
            features:
            labels:
            config: Model config dictionary.

        Returns:
            A model instance.

        Raises:
            ValueError: In case of improperly formatted config dict.
        """
        # set the training mode
        set_learning_phase(Modes.is_train(mode))

        if not isinstance(config, GraphConfig):
            config = GraphConfig.from_dict(config)

        # layer instances created during
        # the graph reconstruction process
        created_layers = {}

        # Create an input layer based on the defined inputs and features
        for layer in config.input_layers:
            layer_name, node_index, tensor_index = cls.get_node_data(layer)
            if layer_name in features:
                created_layers[layer_name] = InputLayer(
                    input_tensor=features[layer_name], name=layer_name)
            elif isinstance(labels, Mapping) and layer_name in labels:
                created_layers[layer_name] = InputLayer(
                    input_tensor=labels[layer_name], name=layer_name)
            else:
                raise ConfigurationError("Input `{}`is not found".format(layer_name))

        def process_layer(layer):
            """Deserialize a layer, then call it on appropriate inputs.

            Arguments:
                layer_data: layer config dict.

            Raises:
                ValueError: In case of improperly formatted `layer_data` dict.
            """
            layer_class = layer.IDENTIFIER
            layer_name = layer.name

            # Instantiate layer.
            if layer_class in LAYERS:
                created_layer = LAYERS[layer_class].from_config(layer)
            elif layer_class in IMAGE_PROCESSORS:
                created_layer = IMAGE_PROCESSORS[layer_class].from_config(layer)
            else:
                raise ValueError("The layer `{}` is not supported.".format(layer_class))
            created_layers[layer_name] = created_layer

            # Gather layer inputs.
            inbound_nodes_data = layer.inbound_nodes
            input_tensors = []
            for input_data in inbound_nodes_data:
                in_layer_name, in_node_index, in_tensor_index = cls.get_node_data(input_data)
                if len(input_data) == 3:
                    kwargs = {}
                elif len(input_data) == 4:
                    kwargs = input_data[3]
                else:
                    raise ValueError('Improperly formatted model config.')
                if in_layer_name not in created_layers:
                    raise ValueError('Missing layer: ' + in_layer_name)
                inbound_layer = created_layers[in_layer_name]
                inbound_node = inbound_layer.inbound_nodes[in_node_index]
                input_tensors.append(inbound_node.output_tensors[in_tensor_index])
            # Call layer on its inputs, thus creating the node
            # and building the layer if needed.
            if input_tensors:
                if len(input_tensors) == 1:
                    created_layer(input_tensors[0], **kwargs)
                else:
                    created_layer(input_tensors, **kwargs)

        for layer in config.layers:
            process_layer(layer)

        name = config.name
        input_tensors = []
        output_tensors = []
        for layer_data in config.input_layers:
            layer_name, node_index, tensor_index = cls.get_node_data(layer_data)
            assert layer_name in created_layers, "Layer `{}` not found".format(layer_name)
            layer = created_layers[layer_name]
            layer_output_tensors = layer.inbound_nodes[node_index].output_tensors
            input_tensors.append(layer_output_tensors[tensor_index])
        for layer_data in config.output_layers:
            layer_name, node_index, tensor_index = cls.get_node_data(layer_data)
            assert layer_name in created_layers
            layer = created_layers[layer_name]
            layer_output_tensors = layer.inbound_nodes[node_index].output_tensors
            output_tensors.append(layer_output_tensors[tensor_index])
        return cls(inputs=input_tensors, outputs=output_tensors, name=name)