def _build_layers_v2(self, input_dict, num_outputs, options): mask = input_dict["obs"]["action_mask"] last_layer = input_dict["obs"]["real_obs"] hiddens = options["fcnet_hiddens"] for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = tf.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=tf.nn.relu, name=label) action_logits = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="fc_out") if num_outputs == 1: return action_logits, last_layer # Mask out invalid actions (use tf.float32.min for stability) inf_mask = tf.maximum(tf.log(mask), tf.float32.min) masked_logits = inf_mask + action_logits return masked_logits, last_layer
def create_inverse_model(self, model_config, encoder): """ Create the inverse submodel of the SCM. Inputs:[Encoded state at t, Encoded state at t - 1, Actions at t - 1, MOA LSTM output at t - 1] Output: Predicted social influence reward at t - 1 :param model_config: The model config dict. :param encoder: The SCM encoder submodel. :return: A new inverse model. """ encoder_output_size = encoder.output_shape[-1] inputs = [ self.create_encoded_input_layer(encoder_output_size, "encoded_input_now"), self.create_encoded_input_layer(encoder_output_size, "encoded_input_next"), self.create_action_input_layer(self.action_space.n, self.num_other_agents + 1), self.create_lstm_input_layer(model_config), ] inputs_concatenated = tf.keras.layers.concatenate(inputs) activation = get_activation_fn(model_config.get("fcnet_activation")) fc_layer = tf.keras.layers.Dense( 32, name="fc_forward", activation=activation, kernel_initializer=normc_initializer(1.0), )(inputs_concatenated) output_layer = tf.keras.layers.Dense( 1, activation="relu", kernel_initializer=normc_initializer(1.0), )(fc_layer) return tf.keras.Model(inputs, output_layer, name="SCM_Inverse_Model")
def _build_layers_v2(self, parameters, outs, args): obs_real_obs = parameters["obs"]["real_obs"] fcnet_hiddens = args["fcnet_hiddens"] obs_action_mask = parameters["obs"]["action_mask"] for i, size in enumerate(fcnet_hiddens): label = "fc{}".format(i) obs_real_obs = slim.fully_connected( obs_real_obs, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.tanh, scope=label) action_logits = slim.fully_connected( obs_real_obs, outs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") if outs == 1: return action_logits, obs_real_obs mask = tf.maximum(tf.log(obs_action_mask), tf.float32.min) logits = mask + action_logits return logits, obs_real_obs
def _build_layers_v2(self, input_dict, num_outputs, options): action_mask = input_dict["obs"]["action_mask"] if num_outputs != action_mask.shape[1].value: raise ValueError( "This model assumes num outputs is equal to max avail actions", num_outputs, action_mask, ) # Standard fully connected network last_layer = input_dict["obs"]["obs"] hiddens = options.get("fcnet_hiddens") for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = tf.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=tf.nn.tanh, name=label, ) action_logits = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="fc_out", ) # Mask out invalid actions (use tf.float32.min for stability) inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min) masked_logits = inf_mask + action_logits return masked_logits, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] hiddens = [32, 32] with tf.name_scope("custom_net"): inputs = slim.conv2d(inputs, 6, [3, 3], 1, activation_fn=tf.nn.relu, scope="conv") last_layer = flatten(inputs) i = 1 for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.relu, scope=label) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") return output, last_layer
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super().__init__(obs_space, action_space, num_outputs, model_config, name) inputs = tf.keras.layers.Input(shape=obs_space.shape, name="inputs") is_training = tf.keras.layers.Input(shape=(), dtype=tf.bool, batch_size=1, name="is_training") last_layer = inputs hiddens = [256, 256] for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = tf.keras.layers.Dense( units=size, kernel_initializer=normc_initializer(1.0), activation=tf.nn.tanh, name=label)(last_layer) # Add a batch norm layer last_layer = tf.keras.layers.BatchNormalization()( last_layer, training=is_training[0]) output = tf.keras.layers.Dense( units=self.num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="fc_out")(last_layer) value_out = tf.keras.layers.Dense( units=1, kernel_initializer=normc_initializer(0.01), activation=None, name="value_out")(last_layer) self.base_model = tf.keras.models.Model(inputs=[inputs, is_training], outputs=[output, value_out]) self.register_variables(self.base_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw): dropout_rate = 0.2 num_outputs = 5 hidden_dim = 10 tf = try_import_tf() super(DQNModel, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kw) # Define the core model layers which will be used by the other # output heads of DistributionalQModel self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") layer_0 = tf.keras.layers.Dropout(rate=dropout_rate, name="my_layer0")(self.inputs) layer_1 = tf.keras.layers.Dense( hidden_dim, name="my_layer1", activation=tf.nn.relu, kernel_initializer=normc_initializer(1.0))(layer_0) layer_2 = tf.keras.layers.Dropout(rate=dropout_rate, name="my_layer2")(layer_1) layer_out = tf.keras.layers.Dense( num_outputs, name="my_out", activation=tf.nn.relu, kernel_initializer=normc_initializer(1.0))(layer_2) self.base_model = tf.keras.Model(inputs=self.inputs, outputs=layer_out) self.register_variables(self.base_model.variables)
def __init__( self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str, ): super(ConvFCNet, self).__init__(obs_space, action_space, num_outputs, model_config, name) inputs_conv = tf.keras.layers.Input(shape=(11, 11, 9), ) inputs_dense = tf.keras.layers.Input(shape=(1260 - 11 * 11 * 9, ), ) feats = Encoder(128)((inputs_conv, inputs_dense)) logits_out = tf.keras.layers.Dense( num_outputs, name="fc_out", kernel_initializer=normc_initializer(0.01), )(feats) value_out = tf.keras.layers.Dense( 1, name="value_out", kernel_initializer=normc_initializer(0.01), )(feats) self.base_model = tf.keras.Model( inputs=[inputs_conv, inputs_dense], outputs=[logits_out, value_out], ) print(self.base_model.summary()) self.register_variables(self.base_model.variables) self._value_out = None
def _build_value_model(self, model_config: ModelConfigDict): """Build value model with given model configuration model_config = {'activation': str, 'hiddens': Sequence} """ activation = get_activation_fn(model_config.get("activation")) hiddens = model_config.get("hiddens", []) inputs = tf.keras.layers.Input( shape=(np.product(self.critic_preprocessor.shape),), name="value-inputs" ) last_layer = inputs for i, size in enumerate(hiddens): last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0), )(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01), )(last_layer) return tf.keras.Model(inputs, [value_out])
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(MyKerasModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") layer_1 = tf.keras.layers.Dense( 16, name="layer1", activation=tf.nn.relu, kernel_initializer=normc_initializer(1.0), )(self.inputs) layer_out = tf.keras.layers.Dense( num_outputs, name="out", activation=None, kernel_initializer=normc_initializer(0.01), )(layer_1) if self.model_config["vf_share_layers"]: value_out = tf.keras.layers.Dense( 1, name="value", activation=None, kernel_initializer=normc_initializer(0.01), )(layer_1) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) else: self.base_model = tf.keras.Model(self.inputs, layer_out)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(TestKerasModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") layer_1 = tf.keras.layers.Dense( 256, name="my_layer1", activation=tf.nn.relu, kernel_initializer=normc_initializer(1.0))(self.inputs) layer_2 = tf.keras.layers.Dense( 256, name="my_layer2", activation=tf.nn.relu, kernel_initializer=normc_initializer(1.0))(layer_1) layer_out = tf.keras.layers.Dense( num_outputs, name="my_out", activation=None, kernel_initializer=normc_initializer(0.01))(layer_2) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(layer_2) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
def __init__(self, obs_space, action_space, num_outputs, model_config, name="my_model"): super(MLPModelV2, self).__init__(obs_space, action_space, num_outputs, model_config, name) # Simplified to one layer. input_layer = tf.keras.layers.Input(obs_space.shape, dtype=obs_space.dtype) layer_1 = tf.keras.layers.Dense( 400, activation="relu", kernel_initializer=normc_initializer(1.0))(input_layer) layer_2 = tf.keras.layers.Dense( 300, activation="relu", kernel_initializer=normc_initializer(1.0))(layer_1) output = tf.keras.layers.Dense( num_outputs, activation=None, kernel_initializer=normc_initializer(0.01))(layer_2) value_out = tf.keras.layers.Dense( 1, activation=None, name="value_out", kernel_initializer=normc_initializer(0.01))(layer_2) self.base_model = tf.keras.Model(input_layer, [output, value_out]) self.register_variables(self.base_model.variables)
def forward(self, input_dict, state, seq_lens): last_layer = input_dict["obs"] hiddens = [256, 256] with tf1.variable_scope("model", reuse=tf1.AUTO_REUSE): if isinstance(input_dict, SampleBatch): is_training = input_dict.is_training else: is_training = input_dict["is_training"] for i, size in enumerate(hiddens): last_layer = tf1.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=tf.nn.tanh, name="fc{}".format(i), ) # Add a batch norm layer last_layer = tf1.layers.batch_normalization( last_layer, training=is_training, name="bn_{}".format(i)) output = tf1.layers.dense( last_layer, self.num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="out", ) self._value_out = tf1.layers.dense( last_layer, 1, kernel_initializer=normc_initializer(1.0), activation=None, name="vf", ) # Register variables. # NOTE: This is not the recommended way of doing things. We would # prefer creating keras-style Layers like it's done in the # `KerasBatchNormModel` class above and then have TFModelV2 auto-detect # the created vars. However, since there is a bug # in keras/tf that prevents us from using that KerasBatchNormModel # example (see comments above), we do variable registration the old, # manual way for this example Model here. if not self._registered: # Register already auto-detected variables (from the wrapping # Model, e.g. DQNTFModel). self.register_variables(self.variables()) # Then register everything we added to the graph in this `forward` # call. self.register_variables( tf1.get_collection(tf1.GraphKeys.TRAINABLE_VARIABLES, scope=".+/model/.+")) self._registered = True return output, []
def __init__( self, obs_space, action_space, num_outputs, model_config, name, hiddens_size=128, cell_size=128, ): super(RNNModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.cell_size = cell_size input_layer = tf.keras.layers.Input(shape=(None, obs_space.shape[0]), name="inputs") state_in_h = tf.keras.layers.Input(shape=(cell_size, ), name="h") state_in_c = tf.keras.layers.Input(shape=(cell_size, ), name="c") seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32) dense1 = DenseLayer(hiddens_size)(input_layer) dense2 = DenseLayer(hiddens_size)(dense1) lstm_out, state_h, state_c = tf.keras.layers.LSTM( cell_size, return_sequences=True, return_state=True, name="lstm", )( inputs=dense2, mask=tf.sequence_mask(seq_in), initial_state=[state_in_h, state_in_c], ) lstm_out = tf.keras.layers.LayerNormalization()(lstm_out) logits = tf.keras.layers.Dense( self.num_outputs, name="logits", kernel_initializer=normc_initializer(0.01), )(lstm_out) values = tf.keras.layers.Dense( 1, activation=None, name="values", kernel_initializer=normc_initializer(0.01), )(lstm_out) # Create the RNN model self.rnn_model = tf.keras.Model( inputs=[input_layer, seq_in, state_in_h, state_in_c], outputs=[logits, values, state_h, state_c], ) self.register_variables(self.rnn_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): super().__init__(obs_space, action_space, num_outputs, model_config, name, **kwargs) conv_filters = model_config['conv_filters'] self.is_conv = bool(conv_filters) orig_shape = obs_space.original_space['board'] new_shape = orig_shape.shape + (1, ) if self.is_conv else (np.prod( orig_shape.shape), ) self.inputs = tf.keras.layers.Input(shape=new_shape, name='observations') last_layer = self.inputs if self.is_conv: conv_activation = get_activation_fn( model_config['conv_activation']) for i, (filters, kernel_size, stride) in enumerate(conv_filters, 1): last_layer = tf.keras.layers.Conv2D(filters, kernel_size, stride, name="conv{}".format(i), activation=conv_activation, padding='same')(last_layer) last_layer = tf.keras.layers.Flatten()(last_layer) fc_activation = get_activation_fn(model_config['fcnet_activation']) for i, size in enumerate(model_config['fcnet_hiddens'], 1): last_layer = tf.keras.layers.Dense( size, name='fc{}'.format(i), activation=fc_activation, kernel_initializer=normc_initializer(1.0))(last_layer) layer_out = tf.keras.layers.Dense( num_outputs, name="my_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) self.register_variables(self.base_model.variables) self._value_out = None
def _build_layers(self, inputs, num_outputs, options): """Process the flattened inputs. Note that dict inputs will be flattened into a vector. To define a model that processes the components separately, use _build_layers_v2(). """ # Soft deprecate this class. All Models should use the ModelV2 # API from here on. deprecation_warning("Model->FullyConnectedNetwork", "ModelV2->FullyConnectedNetwork", error=False) hiddens = options.get("fcnet_hiddens") activation = get_activation_fn(options.get("fcnet_activation")) if len(inputs.shape) > 2: inputs = tf.layers.flatten(inputs) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: # skip final linear layer if options.get("no_final_linear") and i == len(hiddens): output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(1.0), activation=activation, name="fc_out") return output, output label = "fc{}".format(i) last_layer = tf.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=activation, name=label) i += 1 output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="fc_out") return output, last_layer
def __init__(self, dim: int, **kwargs): super().__init__(**kwargs) self.dense = tf.keras.layers.Dense( dim, kernel_initializer=normc_initializer(1.0), ) self.norm = tf.keras.layers.LayerNormalization()
def __init__(self, obs_space, action_space, num_outputs, model_config, name): # TODO: (sven) Support Dicts as well. assert isinstance(obs_space.original_space, (Tuple)), \ "`obs_space.original_space` must be Tuple!" super().__init__(obs_space, action_space, num_outputs, model_config, name) # Build the CNN(s) given obs_space's image components. self.cnns = {} concat_size = 0 for i, component in enumerate(obs_space.original_space): # Image space. if len(component.shape) == 3: config = { "conv_filters": model_config.get("conv_filters", get_filter_config(component.shape)), "conv_activation": model_config.get("conv_activation"), } cnn = ModelCatalog.get_model_v2(component, action_space, num_outputs=None, model_config=config, framework="tf", name="cnn_{}".format(i)) concat_size += cnn.num_outputs self.cnns[i] = cnn # Discrete inputs -> One-hot encode. elif isinstance(component, Discrete): concat_size += component.n # TODO: (sven) Multidiscrete (see e.g. our auto-LSTM wrappers). # Everything else (1D Box). else: assert len(component.shape) == 1, \ "Only input Box 1D or 3D spaces allowed!" concat_size += component.shape[-1] self.logits_and_value_model = None self._value_out = None if num_outputs: # Action-distribution head. concat_layer = tf.keras.layers.Input((concat_size, )) logits_layer = tf.keras.layers.Dense( num_outputs, activation=tf.keras.activations.linear, name="logits")(concat_layer) # Create the value branch model. value_layer = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(concat_layer) self.logits_and_value_model = tf.keras.models.Model( concat_layer, [logits_layer, value_layer]) else: self.num_outputs = concat_size
def value_function(self): assert self.cur_instance, "must call forward first" with self._branch_variable_scope("value_function"): # Simple case: sharing the feature layer if self.model_config["vf_share_layers"]: return tf.reshape( linear(self.cur_instance.last_layer, 1, "value_function", normc_initializer(1.0)), [-1]) # Create a new separate model with no RNN state, etc. branch_model_config = self.model_config.copy() branch_model_config["free_log_std"] = False if branch_model_config["use_lstm"]: branch_model_config["use_lstm"] = False logger.warning( "It is not recommended to use a LSTM model with " "vf_share_layers=False (consider setting it to True). " "If you want to not share layers, you can implement " "a custom LSTM model that overrides the " "value_function() method.") branch_instance = self.legacy_model_cls( self.cur_instance.input_dict, self.obs_space, self.action_space, 1, branch_model_config, state_in=None, seq_lens=None) return tf.reshape(branch_instance.outputs, [-1])
def _build_layers_v2(self, input_dict, num_outputs, options): # Hard deprecate this class. All Models should use the ModelV2 # API from here on. deprecation_warning("Model->LSTM", "RecurrentNetwork", error=False) cell_size = options.get("lstm_cell_size") if options.get("lstm_use_prev_action_reward"): action_dim = int( np.product( input_dict["prev_actions"].get_shape().as_list()[1:])) features = tf.concat( [ input_dict["obs"], tf.reshape( tf.cast(input_dict["prev_actions"], tf.float32), [-1, action_dim]), tf.reshape(input_dict["prev_rewards"], [-1, 1]), ], axis=1) else: features = input_dict["obs"] last_layer = add_time_dimension(features, self.seq_lens) # Setup the LSTM cell lstm = tf1.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True) self.state_init = [ np.zeros(lstm.state_size.c, np.float32), np.zeros(lstm.state_size.h, np.float32) ] # Setup LSTM inputs if self.state_in: c_in, h_in = self.state_in else: c_in = tf1.placeholder( tf.float32, [None, lstm.state_size.c], name="c") h_in = tf1.placeholder( tf.float32, [None, lstm.state_size.h], name="h") self.state_in = [c_in, h_in] # Setup LSTM outputs state_in = tf1.nn.rnn_cell.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf1.nn.dynamic_rnn( lstm, last_layer, initial_state=state_in, sequence_length=self.seq_lens, time_major=False, dtype=tf.float32) self.state_out = list(lstm_state) # Compute outputs last_layer = tf.reshape(lstm_out, [-1, cell_size]) logits = linear(last_layer, num_outputs, "action", normc_initializer(0.01)) return logits, last_layer
def __init__(self, obs_space, action_space, num_outputs, model_config, name="atari_model"): super(AtariModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) inputs = tf.keras.layers.Input(shape=(84,84,4), name='observations') inputs2 = tf.keras.layers.Input(shape=(2,), name="agent_indicator") # Convolutions on the frames on the screen layer1 = tf.keras.layers.Conv2D( 32, [8, 8], strides=(4, 4), activation="relu", data_format='channels_last')(inputs) layer2 = tf.keras.layers.Conv2D( 64, [4, 4], strides=(2, 2), activation="relu", data_format='channels_last')(layer1) layer3 = tf.keras.layers.Conv2D( 64, [3, 3], strides=(1, 1), activation="relu", data_format='channels_last')(layer2) layer4 = tf.keras.layers.Flatten()(layer3) concat_layer = tf.keras.layers.Concatenate()([layer4, inputs2]) layer5 = tf.keras.layers.Dense( 512, activation="relu", kernel_initializer=normc_initializer(1.0))(concat_layer) action = tf.keras.layers.Dense( num_outputs, activation="linear", name="actions", kernel_initializer=normc_initializer(0.01))(layer5) value_out = tf.keras.layers.Dense( 1, activation=None, name="value_out", kernel_initializer=normc_initializer(0.01))(layer5) self.base_model = tf.keras.Model([inputs, inputs2], [action, value_out]) self.register_variables(self.base_model.variables)
def _build_layers(self, inputs, num_outputs, options): """Process the flattened inputs. Note that dict inputs will be flattened into a vector. To define a model that processes the components separately, use _build_layers_v2(). """ hiddens = options.get("fcnet_hiddens") activation = get_activation_fn(options.get("fcnet_activation")) if len(inputs.shape) > 2: inputs = tf.layers.flatten(inputs) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: # skip final linear layer if options.get("no_final_linear") and i == len(hiddens): output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(1.0), activation=activation, name="fc_out") return output, output label = "fc{}".format(i) last_layer = tf.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=activation, name=label) i += 1 output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="fc_out") return output, last_layer
def __init__(self, obs_space, action_space, num_outputs, model_config, name): print(obs_space) super(Linear, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") layer_out = tf.keras.layers.Dense( num_outputs, name="my_out", activation=None, kernel_initializer=normc_initializer(0.01))(self.inputs) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(self.inputs) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) self.register_variables(self.base_model.variables)
def build_primary_layers(prefix: str, obs_in: tf.Tensor, state_in: tf.Tensor): # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf _last_layer = obs_in # for i, (out_size, kernel, stride) in enumerate(cnn_filters): # _last_layer = maybe_td(tf.keras.layers.Conv2D( # filters=out_size, # kernel_size=kernel, # strides=stride, # activation=conv_activation, # padding="same", # name="{}_conv_{}".format(prefix, i)))(_last_layer) # state_out = state_in # if self.use_lstm and not self.fake_lstm: # for i, (out_size, kernel, stride) in enumerate(lstm_filters): # if i > 0: # raise NotImplementedError("Only single lstm layers are implemented right now") # # _last_layer, *state_out = tf.keras.layers.ConvLSTM2D( # filters=out_size, # kernel_size=kernel, # strides=stride, # activation=conv_activation, # padding="same", # return_sequences=True, # return_state=True, # name="{}_convlstm".format(prefix))( # inputs=_last_layer, # mask=tf.sequence_mask(seq_lens_in), # initial_state=state_in) for i, size in enumerate(model_config['fcnet_hiddens']): _last_layer = maybe_td( tf.keras.layers.Dense(size, name="{}_fc_{}".format(prefix, i), activation=conv_activation, kernel_initializer=normc_initializer( 1.0)))(_last_layer) # state_out = state_in # if self.use_lstm: # _last_layer = maybe_td(tf.keras.layers.Flatten())(_last_layer) # _last_layer, *state_out = tf.keras.layers.LSTM( # units=64, # return_sequences=True, # return_state=True, # name="{}_lstm".format(prefix))( # inputs=_last_layer, # mask=tf.sequence_mask(seq_lens_in), # initial_state=state_in) return _last_layer, state_out
def __init__( self, obs_space, action_space, num_outputs, model_config, name, cell_size=64, ): """ Create a LSTM with an actor-critic output: an output head with size num_outputs for the policy, and an output head of size 1 for the value function. :param obs_space: The size of the previous layer. :param action_space: The amount of actions available to the agent. :param num_outputs: The amount of actions available to the agent. :param model_config: The config dict for the model, unused. :param name: The name of the model. :param cell_size: The amount of LSTM units. """ super(ActorCriticLSTM, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.cell_size = cell_size input_layer = tf.keras.layers.Input(shape=(None, obs_space), name="inputs") state_in_h = tf.keras.layers.Input(shape=(cell_size, ), name="h") state_in_c = tf.keras.layers.Input(shape=(cell_size, ), name="c") seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32) lstm_out, state_h, state_c = tf.keras.layers.LSTM( cell_size, return_sequences=True, return_state=True, name="lstm")( inputs=input_layer, mask=tf.sequence_mask(seq_in), initial_state=[state_in_h, state_in_c], ) # Postprocess LSTM output with another hidden layer and compute values logits = tf.keras.layers.Dense(self.num_outputs, activation=tf.keras.activations.linear, name=name)(lstm_out) inputs = [input_layer, seq_in, state_in_h, state_in_c] value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01), )(lstm_out) outputs = [logits, value_out, state_h, state_c] self.rnn_model = tf.keras.Model(inputs=inputs, outputs=outputs, name="Actor_Critic_Model")
def value_function(self): """Builds the value function output. This method can be overridden to customize the implementation of the value function (e.g., not sharing hidden layers). Returns: Tensor of size [BATCH_SIZE] for the value function. """ return tf.reshape( linear(self.last_layer, 1, "value", normc_initializer(1.0)), [-1])
def _build_layers_v2(self, input_dict, num_outputs, options): last_layer = input_dict["obs"] hiddens = [256, 256] for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = tf.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=tf.nn.tanh, name=label) # Add a batch norm layer last_layer = tf.layers.batch_normalization( last_layer, training=input_dict["is_training"]) output = tf.layers.dense(last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="fc_out") return output, last_layer
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(MyModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") activation = tf.nn.tanh last_layer = layer_out = self.inputs i = 1 hiddens = [256, 256, 256] for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 layer_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) # build a parallel set of hidden layers for the value net last_layer = self.inputs i = 1 for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_value_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) self.register_variables(self.base_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw): super(MyKerasQModel, self).__init__( obs_space, action_space, num_outputs, model_config, name, **kw) # Define the core model layers which will be used by the other # output heads of DistributionalQModel self.inputs = tf.keras.layers.Input( shape=obs_space.shape, name="observations") layer_1 = tf.keras.layers.Dense( 128, name="my_layer1", activation=tf.nn.relu, kernel_initializer=normc_initializer(1.0))(self.inputs) layer_out = tf.keras.layers.Dense( num_outputs, name="my_out", activation=tf.nn.relu, kernel_initializer=normc_initializer(1.0))(layer_1) self.base_model = tf.keras.Model(self.inputs, layer_out) self.register_variables(self.base_model.variables)
def init(self): _board = tf.keras.layers.Input(shape=[11, 11, 4], name="board") _attribute = tf.keras.layers.Input(shape=[4], name="attribute") net = _board net = tf.keras.layers.Conv2D(32, 5, strides=2, padding="same", activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net) net = tf.keras.layers.BatchNormalization()(net) net = tf.keras.layers.Conv2D(64, 3, strides=1, padding="valid", activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net) net = tf.keras.layers.BatchNormalization()(net) net = tf.keras.layers.Conv2D(128, 3, strides=1, padding="valid", activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net) net = tf.keras.layers.BatchNormalization()(net) net = tf.keras.layers.Conv2D(128, 2, strides=1, padding="valid", activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net) net = tf.keras.layers.BatchNormalization()(net) net = tf.reshape(net, (-1, net.shape[-1])) net = tf.concat([net, _attribute], axis=1) net = tf.keras.layers.Dense(1024, activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net) # net = tf.keras.layers.Dense(128, activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net) # net = tf.keras.layers.BatchNormalization()(net) net = tf.keras.layers.Dense(1024, activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net) # net = tf.keras.layers.Dense(64, activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net) # net = tf.keras.layers.BatchNormalization()(net) action_out = tf.keras.layers.Dense(self.num_outputs,kernel_initializer=kernel_initializer)(net) value_out = tf.keras.layers.Dense(1,kernel_initializer=kernel_initializer)(net) self.base_model = tf.keras.Model([_board, _attribute], [action_out, value_out]) self.register_variables(self.base_model.variables)