def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): super(HanabiHandInference, self).__init__(obs_space, action_space, model_config["custom_options"]["q_module_hiddens"][-1], model_config, name, **kwargs) self.obs_module = FullyConnectedNetwork(obs_space.original_space["board"], None, model_config["custom_options"]["obs_module_hiddens"][-1], { "fcnet_activation": model_config["fcnet_activation"], "fcnet_hiddens": model_config["custom_options"][ "obs_module_hiddens"], "no_final_linear": True, "vf_share_layers": True}, name + "obs_module") obs_module_output_dummy = numpy.zeros(model_config["custom_options"]["obs_module_hiddens"][-1]) self.q_module = FullyConnectedNetwork(obs_module_output_dummy, None, model_config["custom_options"]["q_module_hiddens"][-1], {"fcnet_activation": model_config["fcnet_activation"], "fcnet_hiddens": model_config["custom_options"]["q_module_hiddens"], "no_final_linear": True, "vf_share_layers": True}, name + "q_module") self.aux_module = FullyConnectedNetwork(obs_module_output_dummy, None, model_config["custom_options"]["aux_module_hiddens"][-1], {"fcnet_activation": model_config["fcnet_activation"], "fcnet_hiddens": model_config["custom_options"][ "aux_module_hiddens"], "no_final_linear": True, "vf_share_layers": True}, name + "aux_module") aux_head_input_dummy = numpy.zeros(model_config["custom_options"]["aux_module_hiddens"][-1]) self.aux_head = FullyConnectedNetwork(aux_head_input_dummy, None, numpy.prod(obs_space.original_space["hidden_hand"].shape), {"fcnet_activation": model_config["fcnet_activation"], "fcnet_hiddens": model_config["custom_options"][ "aux_head_hiddens"], "no_final_linear": False, "vf_share_layers": True}, name + "aux_head") self.register_variables(self.obs_module.variables()) self.register_variables(self.q_module.variables()) self.register_variables(self.aux_module.variables()) self.register_variables(self.aux_head.variables()) self.aux_loss_formula = get_aux_loss_formula(model_config["custom_options"].get("aux_loss_formula", "sqrt"))
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(CentralizedCriticModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) # Base of the model self.model = FullyConnectedNetwork(obs_space, action_space, num_outputs, model_config, name) self.register_variables(self.model.variables()) # Central VF maps (obs, opp_ops, opp_act) -> vf_pred self.max_num_agents = model_config['custom_options']['max_num_agents'] self.obs_space_shape = obs_space.shape[0] other_obs = tf.keras.layers.Input(shape=(obs_space.shape[0] * self.max_num_agents, ), name="opp_obs") central_vf_dense = tf.keras.layers.Dense( model_config['custom_options']['central_vf_size'], activation=tf.nn.tanh, name="c_vf_dense")(other_obs) central_vf_out = tf.keras.layers.Dense( 1, activation=None, name="c_vf_out")(central_vf_dense) self.central_vf = tf.keras.Model(inputs=[other_obs], outputs=central_vf_out) self.register_variables(self.central_vf.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(CustomModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.model = FullyConnectedNetwork(obs_space, action_space, num_outputs, model_config, name) self.register_variables(self.model.variables())
def _build_layers_v2(self, input_dict, num_outputs, options): self.obs_in = input_dict["obs"] with tf.variable_scope("shared", reuse=tf.AUTO_REUSE): self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space, self.action_space, num_outputs, options) return self.fcnet.outputs, self.fcnet.last_layer
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(CentralizedCriticModel, self).__init__( obs_space, action_space, num_outputs, model_config, name) self.action_model = FullyConnectedNetwork( Box(low=0, high=1, shape=(6, )), # one-hot encoded Discrete(6) action_space, num_outputs, model_config, name + "_action") self.register_variables(self.action_model.variables()) self.value_model = FullyConnectedNetwork(obs_space, action_space, 1, model_config, name + "_vf") self.register_variables(self.value_model.variables())
def __init__(self, obs_space, action_space, num_outputs, model_config, name, true_obs_shape=(4, ), action_embed_size=6, **kw): super(ParametricActionsModel, self).__init__( obs_space, action_space, num_outputs, model_config, name, **kw) if model_config['custom_options']['spy']: true_obs_space = make_spy_space(model_config['custom_options']['parties'], model_config['custom_options']['blocks']) else: true_obs_space = make_blind_space(model_config['custom_options']['parties'], model_config['custom_options']['blocks']) if model_config['custom_options']['extended']: action_embed_size = 6 else: action_embed_size = 4 total_dim = 0 for space in true_obs_space: total_dim += get_preprocessor(space)(space).size self.action_embed_model = FullyConnectedNetwork( Box(-1, 1, shape = (total_dim,)), action_space, action_embed_size, model_config, name + "_action_embed") self.register_variables(self.action_embed_model.variables())
def __init__(self, obs_space, action_space, num_outputs, model_config, name, true_obs_shape=(24, ), action_embed_size=None): super(ParametricActionsModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) if action_embed_size is None: action_embed_size = action_space.n # this works for Discrete() action # we get the size of the output of the preprocessor automatically chosen by rllib for the real_obs space real_obs = obs_space.original_space['real_obs'] true_obs_shape = get_preprocessor(real_obs)( real_obs).size # this will we an integer # true_obs_shape = obs_space.original_space['real_obs'] self.action_embed_model = FullyConnectedNetwork( obs_space=Box(-1, 1, shape=(true_obs_shape, )), action_space=action_space, num_outputs=action_embed_size, model_config=model_config, name=name + "_action_embed") self.base_model = self.action_embed_model.base_model self.register_variables(self.action_embed_model.variables())
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): super(HanabiFullyConnected, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kwargs) self.fc = FullyConnectedNetwork(obs_space.original_space["board"], action_space, num_outputs, model_config, name + "fc") self.register_variables(self.fc.variables())
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super().__init__(obs_space, action_space, num_outputs, model_config, name) self.fcnet = FullyConnectedNetwork(self.obs_space, self.action_space, num_outputs, model_config, name="fcnet") self.register_variables(self.fcnet.variables())
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(CentralizedCriticModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) observation_length = 12 obs_lower_bound_list = [0, 0, 20] * observation_length obs_upper_bound_list = [650, 2, 40] * observation_length self.action_model = FullyConnectedNetwork( Box(np.array(obs_lower_bound_list), np.array(obs_upper_bound_list)), # one-hot encoded Discrete(6) action_space, num_outputs, model_config, name + "_action") self.register_variables(self.action_model.variables()) self.value_model = FullyConnectedNetwork(obs_space, action_space, 1, model_config, name + "_vf") self.register_variables(self.value_model.variables())
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): super().__init__(obs_space, action_space, num_outputs, model_config, name, **kwargs) # DictFlatteningPreprocessor, combines all obs components together # obs.shape for MLP should be a flattened game board obs original_space = obs_space.original_space['board'] flat_obs_space = spaces.Box(low=np.min(original_space.low), high=np.max(original_space.high), shape=(np.prod(original_space.shape), )) self.mlp = FullyConnectedNetwork(flat_obs_space, action_space, num_outputs, model_config, name) self.register_variables(self.mlp.variables())
def __init__(self, obs_space, action_space, num_outputs, model_config, name, true_obs_shape=(4, ), action_embed_size=2, **kw): super(ParametricActionsModel, self).__init__( obs_space, action_space, num_outputs, model_config, name, **kw) self.action_embed_model = FullyConnectedNetwork( Box(-1, 1, shape=true_obs_shape), action_space, action_embed_size, model_config, name + "_action_embed") self.register_variables(self.action_embed_model.variables())
def __init__(self, obs_space, action_space, num_outputs, model_config, name, true_obs_shape=(51, 3), action_embed_size=50, *args, **kwargs): super(VMActionMaskModel, self).__init__(obs_space, action_space, num_outputs, model_config, name, *args, **kwargs) self.action_embed_model = FullyConnectedNetwork( spaces.Box(0, 1, shape=true_obs_shape), action_space, action_embed_size, model_config, name + "_action_embedding") self.register_variables(self.action_embed_model.variables())
def __init__( self, obs_space, action_space, num_outputs, model_config, name, ): name = "Pa_model" super(ParametricActionsModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) # get real obs space, discarding action mask real_obs_space = obs_space.original_space.spaces['array_obs'] # define action embed model self.action_embed_model = FullyConnectedNetwork( real_obs_space, action_space, num_outputs, model_config, name + "_action_embed") self.register_variables(self.action_embed_model.variables())
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(CentralizedCriticModel, self).__init__( obs_space, action_space, num_outputs, model_config, name) # Base of the model self.model = FullyConnectedNetwork(obs_space, action_space, num_outputs, model_config, name) self.register_variables(self.model.variables()) # Central VF maps (obs, opp_obs, opp_act) -> vf_pred obs = tf.keras.layers.Input(shape=(6, ), name="obs") opp_obs = tf.keras.layers.Input(shape=(6, ), name="opp_obs") opp_act = tf.keras.layers.Input(shape=(2, ), name="opp_act") concat_obs = tf.keras.layers.Concatenate(axis=1)( [obs, opp_obs, opp_act]) central_vf_dense = tf.keras.layers.Dense( 16, activation=tf.nn.tanh, name="c_vf_dense")(concat_obs) central_vf_out = tf.keras.layers.Dense( 1, activation=None, name="c_vf_out")(central_vf_dense) self.central_vf = tf.keras.Model( inputs=[obs, opp_obs, opp_act], outputs=central_vf_out) self.register_variables(self.central_vf.variables)
def __init__(self, observation_space, action_space, num_outputs, model_config, name): super().__init__(observation_space, action_space, num_outputs, model_config, name) inputs = tf.keras.layers.Input(shape=observation_space.shape) self.fcnet = FullyConnectedNetwork(obs_space=self.obs_space, action_space=self.action_space, num_outputs=self.num_outputs, model_config=self.model_config, name="fc1") out, value_out = self.fcnet.base_model(inputs) def lambda_(x): eager_out = tf.py_function(self.forward_eager, [x], tf.float32) with tf.control_dependencies([eager_out]): eager_out.set_shape(x.shape) return eager_out out = tf.keras.layers.Lambda(lambda_)(out) self.base_model = tf.keras.models.Model(inputs, [out, value_out]) self.register_variables(self.base_model.variables)