예제 #1
0
        def value_function(self):
            assert self.cur_instance, "must call forward first"

            with self._branch_variable_scope("value_function"):
                # Simple case: sharing the feature layer
                if self.model_config["vf_share_layers"]:
                    return tf.reshape(
                        linear(self.cur_instance.last_layer, 1,
                               "value_function", normc_initializer(1.0)), [-1])

                # Create a new separate model with no RNN state, etc.
                branch_model_config = self.model_config.copy()
                branch_model_config["free_log_std"] = False
                if branch_model_config["use_lstm"]:
                    branch_model_config["use_lstm"] = False
                    logger.warning(
                        "It is not recommended to use a LSTM model with "
                        "vf_share_layers=False (consider setting it to True). "
                        "If you want to not share layers, you can implement "
                        "a custom LSTM model that overrides the "
                        "value_function() method.")
                branch_instance = self.legacy_model_cls(
                    self.cur_instance.input_dict,
                    self.obs_space,
                    self.action_space,
                    1,
                    branch_model_config,
                    state_in=None,
                    seq_lens=None)
                return tf.reshape(branch_instance.outputs, [-1])
예제 #2
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        # Hard deprecate this class. All Models should use the ModelV2
        # API from here on.
        deprecation_warning("Model->LSTM", "RecurrentNetwork", error=False)

        cell_size = options.get("lstm_cell_size")
        if options.get("lstm_use_prev_action_reward"):
            action_dim = int(
                np.product(
                    input_dict["prev_actions"].get_shape().as_list()[1:]))
            features = tf.concat(
                [
                    input_dict["obs"],
                    tf.reshape(
                        tf.cast(input_dict["prev_actions"], tf.float32),
                        [-1, action_dim]),
                    tf.reshape(input_dict["prev_rewards"], [-1, 1]),
                ],
                axis=1)
        else:
            features = input_dict["obs"]
        last_layer = add_time_dimension(features, self.seq_lens)

        # Setup the LSTM cell
        lstm = tf1.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True)
        self.state_init = [
            np.zeros(lstm.state_size.c, np.float32),
            np.zeros(lstm.state_size.h, np.float32)
        ]

        # Setup LSTM inputs
        if self.state_in:
            c_in, h_in = self.state_in
        else:
            c_in = tf1.placeholder(
                tf.float32, [None, lstm.state_size.c], name="c")
            h_in = tf1.placeholder(
                tf.float32, [None, lstm.state_size.h], name="h")
            self.state_in = [c_in, h_in]

        # Setup LSTM outputs
        state_in = tf1.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
        lstm_out, lstm_state = tf1.nn.dynamic_rnn(
            lstm,
            last_layer,
            initial_state=state_in,
            sequence_length=self.seq_lens,
            time_major=False,
            dtype=tf.float32)

        self.state_out = list(lstm_state)

        # Compute outputs
        last_layer = tf.reshape(lstm_out, [-1, cell_size])
        logits = linear(last_layer, num_outputs, "action",
                        normc_initializer(0.01))
        return logits, last_layer
예제 #3
0
파일: model.py 프로젝트: xiaming9880/ray
    def value_function(self):
        """Builds the value function output.

        This method can be overridden to customize the implementation of the
        value function (e.g., not sharing hidden layers).

        Returns:
            Tensor of size [BATCH_SIZE] for the value function.
        """
        return tf.reshape(
            linear(self.last_layer, 1, "value", normc_initializer(1.0)), [-1])
예제 #4
0
        def value_function(self):
            assert self.cur_instance is not None, "must call forward first"

            with tf1.variable_scope(self.variable_scope):
                with tf1.variable_scope("value_function",
                                        reuse=tf1.AUTO_REUSE):
                    # Simple case: sharing the feature layer
                    if self.model_config["vf_share_layers"]:
                        return tf.reshape(
                            linear(self.cur_instance.last_layer, 1,
                                   "value_function", normc_initializer(1.0)),
                            [-1])

                    # Create a new separate model with no RNN state, etc.
                    branch_model_config = self.model_config.copy()
                    branch_model_config["free_log_std"] = False
                    obs_space_vf = self.obs_space

                    if branch_model_config["use_lstm"]:
                        branch_model_config["use_lstm"] = False
                        logger.warning(
                            "It is not recommended to use an LSTM model "
                            "with the `vf_share_layers=False` option. "
                            "If you want to use separate policy- and vf-"
                            "networks with LSTMs, you can implement a custom "
                            "LSTM model that overrides the value_function() "
                            "method. "
                            "NOTE: Your policy- and vf-NNs will use the same "
                            "shared LSTM!")
                        # Remove original space from obs-space not to trigger
                        # preprocessing (input to vf-NN is already vectorized
                        # LSTM output).
                        obs_space_vf = copy.copy(self.obs_space)
                        if hasattr(obs_space_vf, "original_space"):
                            delattr(obs_space_vf, "original_space")

                    branch_instance = self.legacy_model_cls(
                        self.cur_instance.input_dict,
                        obs_space_vf,
                        self.action_space,
                        1,
                        branch_model_config,
                        state_in=None,
                        seq_lens=None)
                    return tf.reshape(branch_instance.outputs, [-1])
예제 #5
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        # Previously, a new class object was created during
        # deserialization and this `capture_index`
        # variable would be refreshed between class instantiations.
        # This behavior is no longer the case, so we manually refresh
        # the variable.
        RNNSpyModel.capture_index = 0

        def spy(sequences, state_in, state_out, seq_lens):
            if len(sequences) == 1:
                return 0  # don't capture inference inputs
            # TF runs this function in an isolated context, so we have to use
            # redis to communicate back to our suite
            ray.experimental.internal_kv._internal_kv_put(
                "rnn_spy_in_{}".format(RNNSpyModel.capture_index),
                pickle.dumps({
                    "sequences": sequences,
                    "state_in": state_in,
                    "state_out": state_out,
                    "seq_lens": seq_lens
                }),
                overwrite=True)
            RNNSpyModel.capture_index += 1
            return 0

        features = input_dict["obs"]
        cell_size = 3
        last_layer = add_time_dimension(features, self.seq_lens)

        # Setup the LSTM cell
        lstm = tf.nn.rnn_cell.BasicLSTMCell(cell_size, state_is_tuple=True)
        self.state_init = [
            np.zeros(lstm.state_size.c, np.float32),
            np.zeros(lstm.state_size.h, np.float32)
        ]

        # Setup LSTM inputs
        if self.state_in:
            c_in, h_in = self.state_in
        else:
            c_in = tf.placeholder(tf.float32, [None, lstm.state_size.c],
                                  name="c")
            h_in = tf.placeholder(tf.float32, [None, lstm.state_size.h],
                                  name="h")
        self.state_in = [c_in, h_in]

        # Setup LSTM outputs
        state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
        lstm_out, lstm_state = tf.nn.dynamic_rnn(lstm,
                                                 last_layer,
                                                 initial_state=state_in,
                                                 sequence_length=self.seq_lens,
                                                 time_major=False,
                                                 dtype=tf.float32)

        self.state_out = list(lstm_state)
        spy_fn = tf.py_func(spy, [
            last_layer,
            self.state_in,
            self.state_out,
            self.seq_lens,
        ],
                            tf.int64,
                            stateful=True)

        # Compute outputs
        with tf.control_dependencies([spy_fn]):
            last_layer = tf.reshape(lstm_out, [-1, cell_size])
            logits = linear(last_layer, num_outputs, "action",
                            normc_initializer(0.01))
        return logits, last_layer