def _build_actions(self): """Create the chosen action with an exploration policy. If inference mode is used the, actions are chosen directly without exploration. """ batch_size = get_tensor_batch_size(self._train_results.q) exploration = self._build_exploration() if self.is_continuous: if exploration is None or Modes.is_infer(self.mode): return self._train_results.q # use exploration return self._train_results.q + exploration else: self._index_action = tf.argmax(self._train_results.q, axis=1) if exploration is None or Modes.is_infer(self.mode): return self._index_action # use exploration exploration_size = tf.concat(axis=0, values=[ batch_size, ]) should_explore = tf.random_uniform((), 0, 1) < exploration random_actions = tf.random_uniform(exploration_size, 0, self.num_actions, tf.int64) return tf.cond(should_explore, lambda: random_actions, lambda: self._index_action)
def _build(self, # pylint: disable=arguments-differ features, labels, params=None, config=None): """Build the different operation of the model.""" # Pre-process features and labels features, labels = self._preprocess(features, labels) results = self._call_graph_fn(features=features, labels=labels) loss = None train_op = None eval_metrics = None if Modes.is_infer(self.mode): predictions = self._build_predictions(results=results, features=features, labels=labels) extra_ops = self._build_extra_ops(results=results, features=features, labels=labels) else: _, loss = self._build_loss(results, features, labels) eval_metrics = self._build_eval_metrics(results, features, labels) if Modes.is_train(self.mode): train_op = self._build_train_op(loss) self._build_summary_op(results=results, features=features, labels=labels) predictions = self._build_predictions(results=results, features=features, labels=labels) extra_ops = self._build_extra_ops(results=results, features=features, labels=labels) track(predictions, tf.GraphKeys.PREDICTIONS) return EstimatorSpec(mode=self.mode, predictions=predictions, loss=loss, extra_ops=extra_ops, train_op=train_op, eval_metric_ops=eval_metrics)
def _build(self, features, labels=None, params=None, config=None): # Pre-process features and labels features, labels = self._preprocess(features, labels) results = self._call_graph_fn(features=features, labels=labels) if not isinstance(results, BridgeSpec): raise ValueError('`bridge_fn` should return a BridgeSpec.') loss = None train_op = None eval_metrics = None if Modes.is_infer(self.mode): predictions = self._build_predictions( results=results.results, features=features, labels=labels) else: _, loss = self._build_loss(results, features, features) eval_metrics = self._build_eval_metrics(results.results, features, features) if Modes.is_train(self.mode): train_op = self._build_train_op(loss) self._build_summary_op(results=results.results, features=features, labels=labels) predictions = self._build_predictions( results=results.results, features=features, labels=labels) track(predictions, tf.GraphKeys.PREDICTIONS) return EstimatorSpec(mode=self.mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metrics)
def _preprocess(self, features, labels): """Model specific preprocessing. Args: features: `array`, `Tensor` or `dict`. The environment states. if `dict` it must contain a `state` key. labels: `dict`. A dictionary containing `action`, `reward`, `advantage`. """ features, labels = super(TRPOModel, self)._preprocess(features, labels) if not Modes.is_infer(self.mode) and 'dist_values' not in labels: raise KeyError("labels must include the keys: `dist_values`.") return features, labels
def _call_graph_fn(self, features, labels=None): """Calls graph function. Args: features: `Tensor` or `dict` of tensors labels: `Tensor` or `dict` of tensors """ set_learning_phase(Modes.is_train(self.mode)) kwargs = {} if 'labels' in get_arguments(self._graph_fn): kwargs['labels'] = labels return self._graph_fn(mode=self.mode, features=features, **kwargs)
def get_pipeline_config(mode): return TFRecordImagePipelineConfig( dynamic_pad=False, data_files=train_data_file if Modes.is_train(mode) else eval_data_file, meta_data_file=meta_data_filename, feature_processors=FeatureProcessorsConfig({ 'image': GraphConfig(input_layers=[['image', 0, 0]], output_layers=[['image_out', 0, 0]], layers=[ CastConfig(dtype='float32', name='image_out', inbound_nodes=[['image', 0, 0]]) ]) }))
def _preprocess(self, features, labels): """Model specific preprocessing. Args: features: `array`, `Tensor` or `dict`. The environment states. if `dict` it must contain a `state` key. labels: `dict`. A dictionary containing `action`, `reward`, `advantage`. """ if isinstance(features, Mapping) and 'state' not in features: raise KeyError("features must include a `state` key.") if (not Modes.is_infer(self.mode) and ('action' not in labels or 'reward' not in labels or 'done' not in labels)): raise KeyError( "labels must include these keys: `action`, `reward`, `done`.") return features, labels
def _call_graph_fn(self, features, labels=None): """Calls graph function. Creates first one or two graph, i.e. train and target graphs. Return the optimal action given an exploration policy. If `is_dueling` is set to `True`, then another layer is added that represents the state value. Args: inputs: `Tensor` or `dict` of tensors """ set_learning_phase(Modes.is_train(self.mode)) graph_fn = self._build_graph_fn() self._graph_results = graph_fn(mode=self.mode, features=features, labels=labels) return self._build_actions()
def _build_actions(self): """Create the chosen action w/o sampling. If inference mode is used the, actions are chosen directly without sampling. """ batch_size = get_tensor_batch_size(self._graph_results.a) if self.is_deterministic or Modes.is_infer(self.mode): if self.is_continuous: return self._graph_results.distribution.mean() else: return tf.argmax(self._graph_results.distribution.probs, axis=1) else: if self.is_continuous: return self._graph_results.distribution.sample( sample_shape=batch_size) else: return tf.squeeze(self._graph_results.distribution.sample( sample_shape=batch_size), axis=1)
def _call_graph_fn(self, features, labels=None): """Calls graph function. Creates first one or two graph, i.e. train and target graphs. Return the optimal action given an exploration policy. If `is_dueling` is set to `True`, then another layer is added that represents the state value. Args: features: `Tensor` or `dict` of tensors labels: `Tensor` or `dict` of tensors """ set_learning_phase(Modes.is_train(self.mode)) graph_fn = self._build_graph_fn() if self.use_target_graph: # We create 2 graphs: a training graph and a target graph, # so that we can copy one graph to another given a frequency. self._train_graph = FunctionModule(mode=self.mode, build_fn=graph_fn, name='train') self._train_results = self._train_graph(features=features, labels=labels) self._target_graph = FunctionModule(mode=self.mode, build_fn=graph_fn, name='target') self._target_results = self._target_graph(features=features, labels=labels) return self._build_actions() else: self._train_results = graph_fn(mode=self.mode, features=features, labels=labels) self._target_results = self._train_results return self._build_actions()
def _build(self, features, labels, loss, encoder_fn, decoder_fn, *args, **kwargs): losses, loss = None, None if Modes.GENERATE == self.mode: results = self.decode(incoming=features, features=features, labels=labels, decoder_fn=decoder_fn) elif Modes.ENCODE == self.mode: results = self.encode(features=features, labels=labels, encoder_fn=encoder_fn) else: x = self.encode(features=features, labels=labels, encoder_fn=encoder_fn) results = self.decode(features=x, labels=labels, decoder_fn=decoder_fn) if not Modes.is_infer(self.mode): losses, loss = self._build_loss(results, features, labels, loss) return BridgeSpec(results=results, losses=losses, loss=loss)
def from_config(cls, mode, features, labels, config): # pylint: disable=arguments-differ """Instantiates a Graph container from its config (output of `get_config()`). Arguments: mode: features: labels: config: Model config dictionary. Returns: A model instance. Raises: ValueError: In case of improperly formatted config dict. """ # set the training mode set_learning_phase(Modes.is_train(mode)) if not isinstance(config, GraphConfig): config = GraphConfig.from_dict(config) # layer instances created during # the graph reconstruction process created_layers = {} # Create an input layer based on the defined inputs and features for layer in config.input_layers: layer_name, node_index, tensor_index = cls.get_node_data(layer) if layer_name in features: created_layers[layer_name] = InputLayer( input_tensor=features[layer_name], name=layer_name) elif isinstance(labels, Mapping) and layer_name in labels: created_layers[layer_name] = InputLayer( input_tensor=labels[layer_name], name=layer_name) else: raise ConfigurationError("Input `{}`is not found".format(layer_name)) def process_layer(layer): """Deserialize a layer, then call it on appropriate inputs. Arguments: layer_data: layer config dict. Raises: ValueError: In case of improperly formatted `layer_data` dict. """ layer_class = layer.IDENTIFIER layer_name = layer.name # Instantiate layer. if layer_class in LAYERS: created_layer = LAYERS[layer_class].from_config(layer) elif layer_class in IMAGE_PROCESSORS: created_layer = IMAGE_PROCESSORS[layer_class].from_config(layer) else: raise ValueError("The layer `{}` is not supported.".format(layer_class)) created_layers[layer_name] = created_layer # Gather layer inputs. inbound_nodes_data = layer.inbound_nodes input_tensors = [] for input_data in inbound_nodes_data: in_layer_name, in_node_index, in_tensor_index = cls.get_node_data(input_data) if len(input_data) == 3: kwargs = {} elif len(input_data) == 4: kwargs = input_data[3] else: raise ValueError('Improperly formatted model config.') if in_layer_name not in created_layers: raise ValueError('Missing layer: ' + in_layer_name) inbound_layer = created_layers[in_layer_name] inbound_node = inbound_layer.inbound_nodes[in_node_index] input_tensors.append(inbound_node.output_tensors[in_tensor_index]) # Call layer on its inputs, thus creating the node # and building the layer if needed. if input_tensors: if len(input_tensors) == 1: created_layer(input_tensors[0], **kwargs) else: created_layer(input_tensors, **kwargs) for layer in config.layers: process_layer(layer) name = config.name input_tensors = [] output_tensors = [] for layer_data in config.input_layers: layer_name, node_index, tensor_index = cls.get_node_data(layer_data) assert layer_name in created_layers, "Layer `{}` not found".format(layer_name) layer = created_layers[layer_name] layer_output_tensors = layer.inbound_nodes[node_index].output_tensors input_tensors.append(layer_output_tensors[tensor_index]) for layer_data in config.output_layers: layer_name, node_index, tensor_index = cls.get_node_data(layer_data) assert layer_name in created_layers layer = created_layers[layer_name] layer_output_tensors = layer.inbound_nodes[node_index].output_tensors output_tensors.append(layer_output_tensors[tensor_index]) return cls(inputs=input_tensors, outputs=output_tensors, name=name)