def __init__(self, obs_space, action_space, num_outputs, model_config,
                     name):
            TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                               model_config, name)
            self.legacy_model_cls = legacy_model_cls

            # Tracks the last v1 model created by the call to forward
            self.cur_instance = None

            # XXX: Try to guess the initial state size. Since the size of the
            # state is known only after forward() for V1 models, it might be
            # wrong.
            if model_config.get("state_shape"):
                self.initial_state = [
                    np.zeros(s, np.float32)
                    for s in model_config["state_shape"]
                ]
            elif model_config.get("use_lstm"):
                cell_size = model_config.get("lstm_cell_size", 256)
                self.initial_state = [
                    np.zeros(cell_size, np.float32),
                    np.zeros(cell_size, np.float32),
                ]
            else:
                self.initial_state = []

            # Tracks update ops
            self._update_ops = None

            with tf.variable_scope(self.name) as scope:
                self.variable_scope = scope
Example #2
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)

        alpha = model_config.get("alpha", 1)
        lambda_ = model_config.get("lambda_", 1)
        self.feature_dim = obs_space.sample().size
        self.arms = [
            OnlineLinearRegression(feature_dim=self.feature_dim,
                                   alpha=alpha,
                                   lambda_=lambda_)
            for i in range(self.num_outputs)
        ]
        self._cur_value = None
        self._cur_ctx = None
Example #3
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)

        alpha = model_config.get("alpha", 1)
        lambda_ = model_config.get("lambda_", 0.1)

        # RLlib preprocessors will flatten the observation space and unflatten
        # it later. Accessing the original space here.
        original_space = obs_space.original_space
        assert (
            isinstance(original_space, gym.spaces.Dict)
            and "item" in original_space.spaces
        ), "This model only supports gym.spaces.Dict observation spaces."
        self.feature_dim = original_space["item"].shape[-1]
        self.arm = OnlineLinearRegression(feature_dim=self.feature_dim,
                                          alpha=alpha,
                                          lambda_=lambda_)
        self._cur_value = None
        self._cur_ctx = None
Example #4
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        """Initialize a TFModelV2.

        Here is an example implementation for a subclass
        ``MyRNNClass(RecurrentTFModelV2)``::

            def __init__(self, *args, **kwargs):
                super(MyModelClass, self).__init__(*args, **kwargs)
                cell_size = 256

                # Define input layers
                input_layer = tf.keras.layers.Input(
                    shape=(None, obs_space.shape[0]))
                state_in_h = tf.keras.layers.Input(shape=(256, ))
                state_in_c = tf.keras.layers.Input(shape=(256, ))
                seq_in = tf.keras.layers.Input(shape=())

                # Send to LSTM cell
                lstm_out, state_h, state_c = tf.keras.layers.LSTM(
                    cell_size, return_sequences=True, return_state=True,
                    name="lstm")(
                        inputs=input_layer,
                        mask=tf.sequence_mask(seq_in),
                        initial_state=[state_in_h, state_in_c])
                output_layer = tf.keras.layers.Dense(...)(lstm_out)

                # Create the RNN model
                self.rnn_model = tf.keras.Model(
                    inputs=[input_layer, seq_in, state_in_h, state_in_c],
                    outputs=[output_layer, state_h, state_c])
                self.register_variables(self.rnn_model.variables)
                self.rnn_model.summary()
        """
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)
Example #5
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):

        model_config = with_base_config(base_config=DEFAULT_STRATEGO_MODEL_CONFIG, extra_config=model_config)
        TFModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)

        print(model_config)

        observation_mode = model_config['custom_options']['observation_mode']
        if observation_mode == PARTIALLY_OBSERVABLE:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'partial_observation'
        elif observation_mode == FULLY_OBSERVABLE:
            self.pi_obs_key = 'full_observation'
            self.vf_obs_key = 'full_observation'
        elif observation_mode == BOTH_OBSERVATIONS:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'full_observation'
            assert not model_config['vf_share_layers']
        else:
            assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]"

        if model_config["custom_preprocessor"]:
            print(obs_space)

            self.preprocessor = ModelCatalog.get_preprocessor_for_space(observation_space=self.obs_space.original_space,
                                                                        options=model_config)
        else:
            self.preprocessor = None
            logger.warn("No custom preprocessor for StrategoModel was specified.\n"
                        "Some tree search policies may not initialize their placeholders correctly without this.")

        self.use_lstm = model_config['use_lstm']
        self.lstm_cell_size = model_config['lstm_cell_size']
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.mask_invalid_actions = model_config['custom_options']['mask_invalid_actions']

        conv_activation = get_activation_fn(model_config.get("conv_activation"))
        cnn_filters = model_config.get("conv_filters")
        fc_activation = get_activation_fn(model_config.get("fcnet_activation"))
        hiddens = model_config.get("fcnet_hiddens")

        if self.use_lstm:
            state_in = [tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="pi_lstm_h"),
                        tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="pi_lstm_c"),
                        tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="vf_lstm_h"),
                        tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="vf_lstm_c")]

            seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in")
            
            self.pi_obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self.pi_obs_key].shape), name="pi_observation")
    
            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self.vf_obs_key].shape), name="vf_observation")
        
        else:
            state_in, seq_lens_in = None, None
           
            self.pi_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.pi_obs_key].shape, name="pi_observation")

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.vf_obs_key].shape, name="vf_observation")
           
              
        if cnn_filters is None:
            
            # assuming board size will always remain the same for both pi and vf networks
            if self.use_lstm:
                single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:]
            else:
                single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:]
            cnn_filters = _get_filter_config(single_obs_input_shape)

        def maybe_td(layer):
            if self.use_lstm:
                return tf.keras.layers.TimeDistributed(layer=layer)
            else:
                return layer

        def build_primary_layers(prefix: str, obs_in: tf.Tensor, state_in: tf.Tensor):
            # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf

            _last_layer = obs_in

            for i, (out_size, kernel, stride) in enumerate(cnn_filters):
                _last_layer = maybe_td(tf.keras.layers.Conv2D(
                    filters=out_size,
                    kernel_size=kernel,
                    strides=stride,
                    activation=conv_activation,
                    padding="same",
                    name="{}_conv_{}".format(prefix, i)))(_last_layer)

            _last_layer = maybe_td(tf.keras.layers.Flatten())(_last_layer)

            for i, size in enumerate(hiddens):
                _last_layer = maybe_td(tf.keras.layers.Dense(
                    size,
                    name="{}_fc_{}".format(prefix, i),
                    activation=fc_activation,
                    kernel_initializer=normc_initializer(1.0)))(_last_layer)

            if self.use_lstm:
                _last_layer, *state_out = tf.keras.layers.LSTM(
                    units=self.lstm_cell_size,
                    return_sequences=True,
                    return_state=True,
                    name="{}_lstm".format(prefix))(
                    inputs=_last_layer,
                    mask=tf.sequence_mask(seq_lens_in),
                    initial_state=state_in)
            else:
                state_out = None

            return _last_layer, state_out


        if self.use_lstm:
            pi_state_in = state_in[:2]
            vf_state_in = state_in[2:]
        else:
            pi_state_in, vf_state_in = None, None

        policy_file_path = None
        if 'policy_keras_model_file_path' in model_config['custom_options']:
            policy_file_path = model_config['custom_options']['policy_keras_model_file_path']
        if policy_file_path is not None:
            if self.use_lstm:
                raise NotImplementedError

            pi_state_out = None
            self._pi_model = load_model(filepath=policy_file_path, compile=False)
            # remove loaded input layer
            # pi_model.layers.pop(0)
            # self.pi_obs_inputs = pi_model.layers[0]

            # rename layers
            for layer in self._pi_model.layers:
                layer._name = "pi_" + layer.name
            self._pi_model.layers[-1]._name = 'pi_unmasked_logits'

            self.unmasked_logits_out = self._pi_model(self.pi_obs_inputs)

        else:
            self._pi_model = None
            pi_last_layer, pi_state_out = build_primary_layers(prefix="pi", obs_in=self.pi_obs_inputs,
                                                               state_in=pi_state_in)

            self.unmasked_logits_out = maybe_td(tf.keras.layers.Dense(
                num_outputs,
                name="pi_unmasked_logits",
                activation=None,
                kernel_initializer=normc_initializer(0.01)))(pi_last_layer)

        vf_last_layer, vf_state_out = build_primary_layers(prefix="vf", obs_in=self.vf_obs_inputs,
                                                           state_in=vf_state_in)

        if self.use_lstm:
            state_out = [*pi_state_out, *vf_state_out]
        else:
            state_out = None

        self._use_q_fn = model_config['custom_options']['q_fn']

        if self._use_q_fn:
            value_out_size = num_outputs
        else:
            value_out_size = 1

        value_out = maybe_td(tf.keras.layers.Dense(
            value_out_size,
            name="vf_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01)))(vf_last_layer)
        
        model_inputs = [self.pi_obs_inputs, self.vf_obs_inputs]
        model_outputs = [self.unmasked_logits_out, value_out]
        if self.use_lstm:
            model_inputs += [seq_lens_in, *state_in]
            model_outputs += state_out

        self.base_model = tf.keras.Model(inputs=model_inputs, outputs=model_outputs)

        print(self.base_model.summary())

        self.register_variables(self.base_model.variables)
Example #6
0
    def __init__(self,
                 obs_space=None,
                 action_space=None,
                 num_outputs=35,
                 model_config={},
                 name='my_model'):
        self.base_model: Optional[keras.Model] = None
        self.keras_eval_model: Optional[keras.Model] = None
        self.keras_model_predict_function: Optional[
            K.GraphExecutionFunction] = None
        self.training_status: ModelTrainingStatus = ModelTrainingStatus()
        self._checkpoint: Optional[tf.train.Checkpoint] = None
        self._checkpoint_manager: Optional[tf.train.CheckpointManager] = None
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)
        config = Config(set_defaults=True, load_from_args=True, verify=True)
        Code2VecModelBase.__init__(self, config)

        #    def _create_keras_model(self):
        import tensorflow as tf
        from tensorflow import keras
        from tensorflow.keras.layers import Input, Embedding, Concatenate, Dropout, TimeDistributed, Dense
        from tensorflow.keras.callbacks import Callback
        import tensorflow.keras.backend as K
        from tensorflow.keras.metrics import sparse_top_k_categorical_accuracy
        # Each input sample consists of a bag of x`MAX_CONTEXTS` tuples (source_terminal, path, target_terminal).
        # The valid mask indicates for each context whether it actually exists or it is just a padding.
        path_source_token_input = Input((self.config.MAX_CONTEXTS, ),
                                        dtype=tf.int32)
        path_input = Input((self.config.MAX_CONTEXTS, ), dtype=tf.int32)
        path_target_token_input = Input((self.config.MAX_CONTEXTS, ),
                                        dtype=tf.int32)
        context_valid_mask = Input((self.config.MAX_CONTEXTS, ))

        # Input paths are indexes, we embed these here.
        paths_embedded = Embedding(self.vocabs.path_vocab.size,
                                   self.config.PATH_EMBEDDINGS_SIZE,
                                   name='path_embedding')(path_input)

        # Input terminals are indexes, we embed these here.
        token_embedding_shared_layer = Embedding(
            self.vocabs.token_vocab.size,
            self.config.TOKEN_EMBEDDINGS_SIZE,
            name='token_embedding')
        path_source_token_embedded = token_embedding_shared_layer(
            path_source_token_input)
        path_target_token_embedded = token_embedding_shared_layer(
            path_target_token_input)

        # `Context` is a concatenation of the 2 terminals & path embedding.
        # Each context is a vector of size 3 * EMBEDDINGS_SIZE.
        context_embedded = Concatenate()([
            path_source_token_embedded, paths_embedded,
            path_target_token_embedded
        ])
        context_embedded = Dropout(1 - self.config.DROPOUT_KEEP_RATE)(
            context_embedded)

        # Lets get dense: Apply a dense layer for each context vector (using same weights for all of the context).
        context_after_dense = TimeDistributed(
            Dense(self.config.CODE_VECTOR_SIZE,
                  use_bias=False,
                  activation='tanh'))(context_embedded)

        # The final code vectors are received by applying attention to the "densed" context vectors.
        code_vectors, attention_weights = AttentionLayer(name='attention')(
            [context_after_dense, context_valid_mask])

        # "Decode": Now we use another dense layer to get the target word embedding from each code vector.
        #target_index = Dense(
        #    self.vocabs.target_vocab.size, use_bias=False, activation='softmax', name='target_index')(code_vectors)
        target_index = Dense(num_outputs,
                             use_bias=False,
                             activation='softmax',
                             name='target_index')(code_vectors)
        value_out = Dense(1, activation=None, name='value_out')(code_vectors)
        # Wrap the layers into a Keras model, using our subtoken-metrics and the CE loss.
        inputs = [
            path_source_token_input, path_input, path_target_token_input,
            context_valid_mask
        ]
        self.base_model = keras.Model(inputs=inputs,
                                      outputs=[target_index, value_out])
        self.register_variables(self.base_model.variables)
Example #7
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):

        model_config = with_base_config(
            base_config=DEFAULT_STRATEGO_MODEL_CONFIG,
            extra_config=model_config)
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)

        print(model_config)

        observation_mode = model_config['custom_options']['observation_mode']
        if observation_mode == PARTIALLY_OBSERVABLE:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'partial_observation'
        elif observation_mode == FULLY_OBSERVABLE:
            self.pi_obs_key = 'full_observation'
            self.vf_obs_key = 'full_observation'
        elif observation_mode == BOTH_OBSERVATIONS:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'full_observation'
            assert not model_config['vf_share_layers']
        else:
            assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]"

        if model_config["custom_preprocessor"]:
            print(obs_space)

            self.preprocessor = ModelCatalog.get_preprocessor_for_space(
                observation_space=self.obs_space.original_space,
                options=model_config)
        else:
            self.preprocessor = None
            logger.warn(
                "No custom preprocessor for StrategoModel was specified.\n"
                "Some tree search policies may not initialize their placeholders correctly without this."
            )

        self.use_lstm = model_config['use_lstm']
        self.fake_lstm = model_config['custom_options'].get('fake_lstm')
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.mask_invalid_actions = model_config['custom_options'][
            'mask_invalid_actions']

        conv_activation = get_activation_fn(
            model_config.get("conv_activation"))
        lstm_filters = model_config["custom_options"]['lstm_filters']
        cnn_filters = model_config.get("conv_filters")
        final_pi_filter_amt = model_config["custom_options"][
            "final_pi_filter_amt"]

        rows = obs_space.original_space[self.pi_obs_key].shape[0]
        colums = obs_space.original_space[self.pi_obs_key].shape[1]

        if self.use_lstm:
            if self.fake_lstm:
                self._lstm_state_shape = (1, )
            else:
                self._lstm_state_shape = (rows, colums, lstm_filters[0][0])

        if self.use_lstm:

            state_in = [
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="pi_lstm_h"),
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="pi_lstm_c"),
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="vf_lstm_h"),
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="vf_lstm_c")
            ]

            seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in")

            self.pi_obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self.pi_obs_key].shape),
                name="pi_observation")

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self.vf_obs_key].shape),
                name="vf_observation")

        else:
            state_in, seq_lens_in = None, None

            self.pi_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.pi_obs_key].shape,
                name="pi_observation")

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.vf_obs_key].shape,
                name="vf_observation")

        # if pi_cnn_filters is None:
        #     assert False
        #     # assuming board size will always remain the same for both pi and vf networks
        #     if self.use_lstm:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:]
        #     else:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:]
        #     pi_cnn_filters = _get_filter_config(single_obs_input_shape)
        #
        # if v_cnn_filters is None:
        #     assert False
        #     # assuming board size will always remain the same for both pi and vf networks
        #     if self.use_lstm:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:]
        #     else:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:]
        #     v_cnn_filters = _get_filter_config(single_obs_input_shape)

        def maybe_td(layer):
            if self.use_lstm:
                return tf.keras.layers.TimeDistributed(layer=layer,
                                                       name=f"td_{layer.name}")
            else:
                return layer

        def build_primary_layers(prefix: str, obs_in: tf.Tensor,
                                 state_in: tf.Tensor):
            # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf

            _last_layer = obs_in

            for i, (out_size, kernel, stride) in enumerate(cnn_filters):
                _last_layer = maybe_td(
                    tf.keras.layers.Conv2D(filters=out_size,
                                           kernel_size=kernel,
                                           strides=stride,
                                           activation=conv_activation,
                                           padding="same",
                                           name="{}_conv_{}".format(
                                               prefix, i)))(_last_layer)

            state_out = state_in
            if self.use_lstm and not self.fake_lstm:
                for i, (out_size, kernel, stride) in enumerate(lstm_filters):
                    if i > 0:
                        raise NotImplementedError(
                            "Only single lstm layers are implemented right now"
                        )

                    _last_layer, *state_out = tf.keras.layers.ConvLSTM2D(
                        filters=out_size,
                        kernel_size=kernel,
                        strides=stride,
                        activation=conv_activation,
                        padding="same",
                        return_sequences=True,
                        return_state=True,
                        name="{}_convlstm".format(prefix))(
                            inputs=_last_layer,
                            mask=tf.sequence_mask(seq_lens_in),
                            initial_state=state_in)

            # state_out = state_in
            # if self.use_lstm:
            #     _last_layer = maybe_td(tf.keras.layers.Flatten())(_last_layer)
            #     _last_layer, *state_out = tf.keras.layers.LSTM(
            #         units=64,
            #         return_sequences=True,
            #         return_state=True,
            #         name="{}_lstm".format(prefix))(
            #         inputs=_last_layer,
            #         mask=tf.sequence_mask(seq_lens_in),
            #         initial_state=state_in)

            return _last_layer, state_out

        if self.use_lstm:
            pi_state_in = state_in[:2]
            vf_state_in = state_in[2:]
        else:
            pi_state_in, vf_state_in = None, None

        pi_last_layer, pi_state_out = build_primary_layers(
            prefix="pi", obs_in=self.pi_obs_inputs, state_in=pi_state_in)

        vf_last_layer, vf_state_out = build_primary_layers(
            prefix="vf", obs_in=self.vf_obs_inputs, state_in=vf_state_in)

        if self.use_lstm:
            state_out = [*pi_state_out, *vf_state_out]
        else:
            state_out = None

        pi_last_layer = maybe_td(
            tf.keras.layers.Conv2D(filters=final_pi_filter_amt,
                                   kernel_size=[3, 3],
                                   strides=1,
                                   activation=conv_activation,
                                   padding="same",
                                   name="{}_conv_{}".format(
                                       'pi', "last")))(pi_last_layer)

        print(
            f"action space n: {action_space.n}, rows: {rows}, columns: {colums}, filters: {int(action_space.n / (rows * colums))}"
        )

        unmasked_logits_out = maybe_td(
            tf.keras.layers.Conv2D(
                filters=int(action_space.n / (rows * colums)),
                kernel_size=[3, 3],
                strides=1,
                activation=None,
                padding="same",
                name="{}_conv_{}".format('pi',
                                         "unmasked_logits")))(pi_last_layer)

        # pi_last_layer = maybe_td(tf.keras.layers.Flatten(name="pi_flatten"))(pi_last_layer)
        # unmasked_logits_out = maybe_td(tf.keras.layers.Dense(
        #     units=9,
        #     name="pi_unmasked_logits_out",
        #     activation=None,
        #     kernel_initializer=normc_initializer(0.01)))(pi_last_layer)
        # unmasked_logits_out = maybe_td(tf.keras.layers.Reshape(target_shape=[3,3,1]))(unmasked_logits_out)

        self._use_q_fn = model_config['custom_options']['q_fn']

        if self._use_q_fn:
            vf_last_layer = maybe_td(
                tf.keras.layers.Conv2D(filters=final_pi_filter_amt,
                                       kernel_size=[3, 3],
                                       strides=1,
                                       activation=conv_activation,
                                       padding="same",
                                       name="{}_conv_{}".format(
                                           'vf', "last")))(vf_last_layer)

            value_out = maybe_td(
                tf.keras.layers.Conv2D(
                    filters=int(action_space.n / (rows * colums)),
                    kernel_size=[3, 3],
                    strides=1,
                    activation=None,
                    padding="same",
                    name="{}_conv_{}".format('vf', "q_out")))(vf_last_layer)
        else:

            vf_last_layer = maybe_td(
                tf.keras.layers.Conv2D(filters=1,
                                       kernel_size=[1, 1],
                                       strides=1,
                                       activation=conv_activation,
                                       padding="same",
                                       name="{}_conv_{}".format(
                                           'vf', "last")))(vf_last_layer)

            vf_last_layer = maybe_td(
                tf.keras.layers.Flatten(name="vf_flatten"))(vf_last_layer)

            value_out = maybe_td(
                tf.keras.layers.Dense(
                    units=1,
                    name="vf_out",
                    activation=None,
                    kernel_initializer=normc_initializer(0.01)))(vf_last_layer)

        model_inputs = [self.pi_obs_inputs, self.vf_obs_inputs]
        model_outputs = [unmasked_logits_out, value_out]

        if self.use_lstm:
            model_inputs += [seq_lens_in, *state_in]
            model_outputs += state_out

        self.base_model = tf.keras.Model(inputs=model_inputs,
                                         outputs=model_outputs)

        print(self.base_model.summary())

        self.register_variables(self.base_model.variables)
Example #8
0
        def __init__(self, obs_space, action_space, num_outputs, model_config,
                     name):
            TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                               model_config, name)
            self.legacy_model_cls = legacy_model_cls

            def instance_template(input_dict, state, seq_lens):
                # create a new model instance
                with tf.variable_scope(self.name):
                    new_instance = self.legacy_model_cls(
                        input_dict, obs_space, action_space, num_outputs,
                        model_config, state, seq_lens)
                return new_instance

            self.instance_template = tf.make_template("instance_template",
                                                      instance_template)
            # Tracks the last v1 model created by the call to forward
            self.cur_instance = None

            def vf_template(last_layer, input_dict):
                with tf.variable_scope(self.variable_scope):
                    with tf.variable_scope("value_function"):
                        # Simple case: sharing the feature layer
                        if model_config["vf_share_layers"]:
                            return tf.reshape(
                                linear(last_layer, 1, "value_function",
                                       normc_initializer(1.0)), [-1])

                        # Create a new separate model with no RNN state, etc.
                        branch_model_config = model_config.copy()
                        branch_model_config["free_log_std"] = False
                        if branch_model_config["use_lstm"]:
                            branch_model_config["use_lstm"] = False
                            logger.warning(
                                "It is not recommended to use a LSTM model "
                                "with vf_share_layers=False (consider "
                                "setting it to True). If you want to not "
                                "share layers, you can implement a custom "
                                "LSTM model that overrides the "
                                "value_function() method.")
                        branch_instance = legacy_model_cls(
                            input_dict,
                            obs_space,
                            action_space,
                            1,
                            branch_model_config,
                            state_in=None,
                            seq_lens=None)
                        return tf.reshape(branch_instance.outputs, [-1])

            self.vf_template = tf.make_template("vf_template", vf_template)

            # XXX: Try to guess the initial state size. Since the size of the
            # state is known only after forward() for V1 models, it might be
            # wrong.
            if model_config.get("state_shape"):
                self.initial_state = [
                    np.zeros(s, np.float32)
                    for s in model_config["state_shape"]
                ]
            elif model_config.get("use_lstm"):
                cell_size = model_config.get("lstm_cell_size", 256)
                self.initial_state = [
                    np.zeros(cell_size, np.float32),
                    np.zeros(cell_size, np.float32),
                ]
            else:
                self.initial_state = []

            with tf.variable_scope(self.name) as scope:
                self.variable_scope = scope
Example #9
0
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name):
     TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                        model_config, name)
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):

        model_config = with_base_config(
            base_config=DEFAULT_STRATEGO_MODEL_CONFIG,
            extra_config=model_config)
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)

        print(model_config)

        observation_mode = model_config['custom_options']['observation_mode']
        if observation_mode == PARTIALLY_OBSERVABLE:
            self._obs_key = 'partial_observation'
        elif observation_mode == FULLY_OBSERVABLE:
            self._obs_key = 'full_observation'
        elif observation_mode == BOTH_OBSERVATIONS:
            raise NotImplementedError
        else:
            assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]"

        self._action_dist_class, self._logit_dim = ModelCatalog.get_action_dist(
            self.action_space, model_config)

        self.use_lstm = model_config['use_lstm']
        self.fake_lstm = model_config['custom_options'].get('fake_lstm', False)

        self.mask_invalid_actions = model_config['custom_options'][
            'mask_invalid_actions']

        conv_activation = get_activation_fn(
            model_config.get("conv_activation"))
        base_lstm_filters = model_config["custom_options"]['base_lstm_filters']
        base_cnn_filters = model_config["custom_options"]['base_cnn_filters']
        pi_cnn_filters = model_config["custom_options"]['pi_cnn_filters']
        q_cnn_filters = model_config["custom_options"]['q_cnn_filters']

        rows = obs_space.original_space[self._obs_key].shape[0]
        colums = obs_space.original_space[self._obs_key].shape[1]

        if self.use_lstm:
            self._lstm_state_shape = (rows, colums, base_lstm_filters[0][0])

        if self.use_lstm and not self.fake_lstm:
            self._base_model_out_shape = (rows, colums,
                                          base_lstm_filters[0][0])
        else:
            self._base_model_out_shape = (rows, colums,
                                          base_cnn_filters[-1][0])

        if self.use_lstm:
            state_in = [
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="base_lstm_h"),
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="base_lstm_c")
            ]
            seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in")

            self._obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self._obs_key].shape),
                name="observation")
            self._base_model_out = tf.keras.layers.Input(
                shape=self._base_model_out_shape, name="model_out")
        else:
            state_in, seq_lens_in = None, None
            self._obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self._obs_key].shape,
                name="observation")
            self._base_model_out = tf.keras.layers.Input(
                shape=self._base_model_out_shape, name="model_out")

        def maybe_td(layer):
            if self.use_lstm:
                return tf.keras.layers.TimeDistributed(layer=layer,
                                                       name=f"td_{layer.name}")
            else:
                return layer

        def build_shared_base_layers(prefix: str, obs_in: tf.Tensor,
                                     state_in: tf.Tensor):
            # obs_in = tf.debugging.check_numerics(
            #     obs_in, f"nan found in obs_in", name=None)

            _last_layer = obs_in

            for i, (out_size, kernel, stride) in enumerate(base_cnn_filters):
                _last_layer = maybe_td(
                    tf.keras.layers.Conv2D(filters=out_size,
                                           kernel_size=kernel,
                                           strides=stride,
                                           activation=conv_activation,
                                           padding="same",
                                           name="{}_conv_{}".format(
                                               prefix, i)))(_last_layer)
                # _last_layer = tf.debugging.check_numerics(
                #     _last_layer, f"nan found in _last_layer {i}", name=None)

            base_state_out = state_in
            if self.use_lstm and not self.fake_lstm:
                for i, (out_size, kernel,
                        stride) in enumerate(base_lstm_filters):
                    if i > 0:
                        raise NotImplementedError(
                            "Only single lstm layers are implemented right now"
                        )

                    _last_layer, *base_state_out = tf.keras.layers.ConvLSTM2D(
                        filters=out_size,
                        kernel_size=kernel,
                        strides=stride,
                        activation=conv_activation,
                        padding="same",
                        data_format='channels_last',
                        return_sequences=True,
                        return_state=True,
                        name="{}_convlstm".format(prefix))(
                            inputs=_last_layer,
                            initial_state=state_in,
                            mask=tf.sequence_mask(seq_lens_in))

            return _last_layer, base_state_out

        def build_pi_layers(input_layer):
            _last_layer = input_layer
            for i, (out_size, kernel, stride) in enumerate(pi_cnn_filters):
                _last_layer = tf.keras.layers.Conv2D(
                    filters=out_size,
                    kernel_size=kernel,
                    strides=stride,
                    activation=conv_activation,
                    padding="same",
                    name="{}_conv_{}".format('pi', i))(_last_layer)

            print(
                f"action space n: {action_space.n}, rows: {rows}, columns: {colums}, filters: {int(action_space.n / (rows * colums))}"
            )

            unmasked_logits = tf.keras.layers.Conv2D(
                filters=int(action_space.n / (rows * colums)),
                kernel_size=[3, 3],
                strides=1,
                activation=None,
                padding="same",
                name="{}_conv_{}".format('pi', "unmasked_logits"))(_last_layer)
            return unmasked_logits

        def build_q_layers(input_layer, prefix):
            _last_layer = input_layer
            for i, (out_size, kernel, stride) in enumerate(q_cnn_filters):
                _last_layer = tf.keras.layers.Conv2D(
                    filters=out_size,
                    kernel_size=kernel,
                    strides=stride,
                    activation=conv_activation,
                    padding="same",
                    name="{}_conv_{}".format(prefix, i))(_last_layer)

            q_val = tf.keras.layers.Conv2D(
                filters=int(action_space.n / (rows * colums)),
                kernel_size=[3, 3],
                strides=1,
                activation=None,
                padding="same",
                name="{}_conv_{}".format(prefix, "q_out"))(_last_layer)
            return q_val

        base_model_out, state_out = build_shared_base_layers(
            prefix="shared_base", obs_in=self._obs_inputs, state_in=state_in)
        pi_unmasked_logits_out = build_pi_layers(
            input_layer=self._base_model_out)
        q1_out = build_q_layers(input_layer=self._base_model_out, prefix="q1")
        q2_out = build_q_layers(input_layer=self._base_model_out, prefix="q2")

        base_inputs = [self._obs_inputs]
        base_outputs = [base_model_out]
        if self.use_lstm:
            base_inputs += [seq_lens_in, *state_in]
            base_outputs += [*state_out]

        self._base_model = tf.keras.Model(name=f"{name}_base",
                                          inputs=base_inputs,
                                          outputs=base_outputs)

        self.pi_model = tf.keras.Model(name=f"{name}_pi_head",
                                       inputs=[self._base_model_out],
                                       outputs=[pi_unmasked_logits_out])
        self.q1_model = tf.keras.Model(name=f"{name}_q1_head",
                                       inputs=[self._base_model_out],
                                       outputs=[q1_out])
        self.q2_model = tf.keras.Model(name=f"{name}_q2_head",
                                       inputs=[self._base_model_out],
                                       outputs=[q2_out])

        print(self._base_model.summary())
        print(self.pi_model.summary())
        print(self.q1_model.summary())
        print(self.q2_model.summary())

        self.register_variables(self._base_model.variables)
        self.register_variables(self.pi_model.variables)
        self.register_variables(self.q1_model.variables)
        self.register_variables(self.q2_model.variables)

        self.log_alpha = tf.Variable(0.0, dtype=tf.float32, name="log_alpha")
        self.alpha = tf.exp(self.log_alpha)
        self.register_variables([self.log_alpha])
Example #11
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, twin_q):

        model_config = with_base_config(
            base_config=DEFAULT_STRATEGO_MODEL_CONFIG,
            extra_config=model_config)
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)

        print(model_config)

        observation_mode = model_config['custom_options']['observation_mode']
        if observation_mode == PARTIALLY_OBSERVABLE:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'partial_observation'
        elif observation_mode == FULLY_OBSERVABLE:
            self.pi_obs_key = 'full_observation'
            self.vf_obs_key = 'full_observation'
        elif observation_mode == BOTH_OBSERVATIONS:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'full_observation'
            assert not model_config['vf_share_layers']
        else:
            assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]"

        if model_config["custom_preprocessor"]:
            print(obs_space)

            self.preprocessor = ModelCatalog.get_preprocessor_for_space(
                observation_space=self.obs_space.original_space,
                options=model_config)
        else:
            self.preprocessor = None
            logger.warn(
                "No custom preprocessor for StrategoModel was specified.\n"
                "Some tree search policies may not initialize their placeholders correctly without this."
            )

        self.use_lstm = model_config['use_lstm']
        if self.use_lstm:
            raise NotImplementedError

        self.fake_lstm = model_config['custom_options'].get('fake_lstm', False)
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.mask_invalid_actions = model_config['custom_options'][
            'mask_invalid_actions']
        self._use_q_fn = model_config['custom_options']['q_fn']

        self.twin_q = twin_q
        assert not (not self._use_q_fn and self.twin_q)
        if self.twin_q and self.use_lstm:
            raise NotImplementedError
        self._sac_alpha = model_config.get("sac_alpha", False)

        conv_activation = get_activation_fn(
            model_config.get("conv_activation"))

        if self.use_lstm:
            raise NotImplementedError
        else:
            state_in, seq_lens_in = None, None

            self.pi_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.pi_obs_key].shape,
                name="pi_observation")

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.vf_obs_key].shape,
                name="vf_observation")

        def maybe_td(layer):
            if self.use_lstm:
                return tf.keras.layers.TimeDistributed(layer=layer,
                                                       name=f"td_{layer.name}")
            else:
                return layer

        def build_primary_layers(prefix: str, obs_in: tf.Tensor,
                                 state_in: tf.Tensor):
            # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf

            _last_layer = obs_in
            state_out = state_in
            for i, size in enumerate(model_config['fcnet_hiddens']):
                _last_layer = maybe_td(
                    tf.keras.layers.Dense(size,
                                          name="{}_fc_{}".format(prefix, i),
                                          activation=conv_activation,
                                          kernel_initializer=normc_initializer(
                                              1.0)))(_last_layer)

            return _last_layer, state_out

        if self.use_lstm:
            pi_state_in = state_in[:2]
            vf_state_in = state_in[2:]
        else:
            pi_state_in, vf_state_in = None, None

        self.main_vf_prefix = "main_vf" if self.twin_q else "vf"
        pi_last_layer, pi_state_out = build_primary_layers(
            prefix="pi", obs_in=self.pi_obs_inputs, state_in=pi_state_in)

        vf_last_layer, vf_state_out = build_primary_layers(
            prefix=self.main_vf_prefix,
            obs_in=self.vf_obs_inputs,
            state_in=vf_state_in)
        if self.twin_q:
            twin_vf_last_layer, twin_vf_state_out = build_primary_layers(
                prefix="twin_vf", obs_in=self.vf_obs_inputs, state_in=None)
        else:
            twin_vf_last_layer, twin_vf_state_out = None, None

        if self.use_lstm:
            raise NotImplementedError
        else:
            state_out = None

        unmasked_logits_out = maybe_td(
            tf.keras.layers.Dense(
                action_space.n,
                name="{}_fc_{}".format('pi', 'unmasked_logits'),
                activation=None,
                kernel_initializer=normc_initializer(1.0))(pi_last_layer))

        value_out = maybe_td(
            tf.keras.layers.Dense(
                action_space.n,
                name="{}_fc_{}".format(self.main_vf_prefix, 'q_out'),
                activation=None,
                kernel_initializer=normc_initializer(1.0))(vf_last_layer))

        if self.twin_q:
            twin_value_out = maybe_td(
                tf.keras.layers.Dense(action_space.n,
                                      name="{}_fc_{}".format(
                                          'twin_vf', 'q_out'),
                                      activation=None,
                                      kernel_initializer=normc_initializer(
                                          1.0))(twin_vf_last_layer))

        self.pi_model = tf.keras.Model(inputs=[self.pi_obs_inputs],
                                       outputs=[unmasked_logits_out])
        self.main_q_model = tf.keras.Model(inputs=[self.vf_obs_inputs],
                                           outputs=[value_out])

        if self.twin_q:
            self.twin_q_model = tf.keras.Model(inputs=[self.vf_obs_inputs],
                                               outputs=[twin_value_out])
            print(self.twin_q_model.summary())
            self.register_variables(self.twin_q_model.variables)

        print(self.pi_model.summary())
        print(self.main_q_model.summary())

        self.register_variables(self.pi_model.variables)
        self.register_variables(self.main_q_model.variables)

        self.log_alpha = tf.Variable(0.0, dtype=tf.float32, name="log_alpha")
        self.alpha = tf.exp(self.log_alpha)
        self.register_variables([self.log_alpha])
Example #12
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 q_hiddens=None,
                 dueling=False,
                 num_atoms=1,
                 use_noisy=False,
                 v_min=-10.0,
                 v_max=10.0,
                 sigma0=0.5,
                 parameter_noise=False):

        if q_hiddens or dueling or num_atoms != 1 or use_noisy:
            raise NotImplementedError

        model_config = with_base_config(
            base_config=DEFAULT_STRATEGO_MODEL_CONFIG,
            extra_config=model_config)
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)

        print(model_config)

        observation_mode = model_config['custom_options']['observation_mode']
        if observation_mode == PARTIALLY_OBSERVABLE:
            self.vf_obs_key = 'partial_observation'
        elif observation_mode == FULLY_OBSERVABLE:
            self.vf_obs_key = 'full_observation'
        elif observation_mode == BOTH_OBSERVATIONS:
            raise ValueError(
                f"Using {BOTH_OBSERVATIONS} format doesn't make sense for a Q-network, there's no policy, just a Q-function"
            )

        else:
            assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]"

        if model_config["custom_preprocessor"]:
            print(obs_space)

            self.preprocessor = ModelCatalog.get_preprocessor_for_space(
                observation_space=self.obs_space.original_space,
                options=model_config)
        else:
            self.preprocessor = None
            logger.warn(
                "No custom preprocessor for StrategoModel was specified.\n"
                "Some tree search policies may not initialize their placeholders correctly without this."
            )

        self.use_lstm = model_config['use_lstm']
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.mask_invalid_actions = model_config['custom_options'][
            'mask_invalid_actions']

        conv_activation = get_activation_fn(
            model_config.get("conv_activation"))
        lstm_filters = model_config["custom_options"]['lstm_filters']
        cnn_filters = model_config.get("conv_filters")
        final_pi_filter_amt = model_config["custom_options"][
            "final_pi_filter_amt"]

        rows = obs_space.original_space[self.vf_obs_key].shape[0]
        colums = obs_space.original_space[self.vf_obs_key].shape[1]

        if self.use_lstm:
            self._lstm_state_shape = (rows, colums, lstm_filters[0][0])
            # self._lstm_state_shape = (64,)

        if self.use_lstm:
            state_in = [
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="vf_lstm_h"),
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="vf_lstm_c")
            ]

            seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in")

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self.vf_obs_key].shape),
                name="vf_observation")

        else:
            state_in, seq_lens_in = None, None

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.vf_obs_key].shape,
                name="vf_observation")

        # if pi_cnn_filters is None:
        #     assert False
        #     # assuming board size will always remain the same for both pi and vf networks
        #     if self.use_lstm:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:]
        #     else:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:]
        #     pi_cnn_filters = _get_filter_config(single_obs_input_shape)
        #
        # if v_cnn_filters is None:
        #     assert False
        #     # assuming board size will always remain the same for both pi and vf networks
        #     if self.use_lstm:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:]
        #     else:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:]
        #     v_cnn_filters = _get_filter_config(single_obs_input_shape)

        def maybe_td(layer):
            if self.use_lstm:
                return tf.keras.layers.TimeDistributed(layer=layer,
                                                       name=f"td_{layer.name}")
            else:
                return layer

        def build_primary_layers(prefix: str, obs_in: tf.Tensor,
                                 state_in: tf.Tensor):
            # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf

            _last_layer = obs_in

            for i, (out_size, kernel, stride) in enumerate(cnn_filters):
                _last_layer = maybe_td(
                    tf.keras.layers.Conv2D(filters=out_size,
                                           kernel_size=kernel,
                                           strides=stride,
                                           activation=conv_activation,
                                           padding="same",
                                           name="{}_conv_{}".format(
                                               prefix, i)))(_last_layer)

                if parameter_noise:
                    # assuming inputs shape (batch_size x w x h x channel)
                    _last_layer = maybe_td(
                        tf.keras.layers.LayerNormalization(
                            axis=(1, 2),
                            name=f"{prefix}_LayerNorm_{i}"))(_last_layer)

            state_out = state_in
            if self.use_lstm:
                for i, (out_size, kernel, stride) in enumerate(lstm_filters):
                    if i > 0:
                        raise NotImplementedError(
                            "Only single lstm layers are implemented right now"
                        )

                    _last_layer, *state_out = tf.keras.layers.ConvLSTM2D(
                        filters=out_size,
                        kernel_size=kernel,
                        strides=stride,
                        activation=conv_activation,
                        padding="same",
                        return_sequences=True,
                        return_state=True,
                        name="{}_convlstm".format(prefix))(
                            inputs=_last_layer,
                            mask=tf.sequence_mask(seq_lens_in),
                            initial_state=state_in)
                    raise NotImplementedError(
                        "havent checked lstms for q model"
                        "")
            return _last_layer, state_out

        if self.use_lstm:
            vf_state_in = state_in[2:]
        else:
            pi_state_in, vf_state_in = None, None

        vf_last_layer, vf_state_out = build_primary_layers(
            prefix="vf", obs_in=self.vf_obs_inputs, state_in=vf_state_in)

        if self.use_lstm:
            state_out = vf_state_out
        else:
            state_out = None

        vf_last_layer = maybe_td(
            tf.keras.layers.Conv2D(filters=final_pi_filter_amt,
                                   kernel_size=[3, 3],
                                   strides=1,
                                   activation=conv_activation,
                                   padding="same",
                                   name="{}_conv_{}".format(
                                       'vf', "last")))(vf_last_layer)

        if parameter_noise:
            # assuming inputs shape (batch_size x w x h x channel)
            vf_last_layer = maybe_td(
                tf.keras.layers.LayerNormalization(
                    axis=(1, 2), name=f"vf_LayerNorm_last"))(vf_last_layer)

        print(
            f"action space n: {action_space.n}, rows: {rows}, columns: {colums}, filters: {int(action_space.n / (rows * colums))}"
        )

        unmasked_logits_out = maybe_td(
            tf.keras.layers.Conv2D(
                filters=int(action_space.n / (rows * colums)),
                kernel_size=[3, 3],
                strides=1,
                activation=None,
                padding="same",
                name="{}_conv_{}".format('vf',
                                         "unmasked_logits")))(vf_last_layer)

        # vf_last_layer = maybe_td(tf.keras.layers.Conv2D(
        #     filters=1,
        #     kernel_size=[1, 1],
        #     strides=1,
        #     activation=conv_activation,
        #     padding="same",
        #     name="{}_conv_{}".format('vf', "last")))(vf_last_layer)
        #
        # vf_last_layer = maybe_td(tf.keras.layers.Flatten(name="vf_flatten"))(vf_last_layer)
        #
        # value_out = maybe_td(tf.keras.layers.Dense(
        #     units=1,
        #     name="vf_out",
        #     activation=None,
        #     kernel_initializer=normc_initializer(0.01)))(vf_last_layer)

        model_inputs = [self.vf_obs_inputs]
        model_outputs = [unmasked_logits_out]

        if self.use_lstm:
            model_inputs += [seq_lens_in, *state_in]
            model_outputs += state_out

        self.base_model = tf.keras.Model(inputs=model_inputs,
                                         outputs=model_outputs)

        print(self.base_model.summary())

        self.register_variables(self.base_model.variables)