コード例 #1
0
    def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs):
        super(HanabiHandInference, self).__init__(obs_space, action_space,
                                                         model_config["custom_options"]["q_module_hiddens"][-1],
                                                         model_config, name,
                                                         **kwargs)
        self.obs_module = FullyConnectedNetwork(obs_space.original_space["board"],
                                                None,
                                                model_config["custom_options"]["obs_module_hiddens"][-1],
                                                {
                                                    "fcnet_activation": model_config["fcnet_activation"],
                                                    "fcnet_hiddens": model_config["custom_options"][
                                                        "obs_module_hiddens"],
                                                    "no_final_linear": True,
                                                    "vf_share_layers": True},
                                                name + "obs_module")

        obs_module_output_dummy = numpy.zeros(model_config["custom_options"]["obs_module_hiddens"][-1])
        self.q_module = FullyConnectedNetwork(obs_module_output_dummy, None,
                                              model_config["custom_options"]["q_module_hiddens"][-1],
                                              {"fcnet_activation": model_config["fcnet_activation"],
                                               "fcnet_hiddens": model_config["custom_options"]["q_module_hiddens"],
                                               "no_final_linear": True,
                                               "vf_share_layers": True},
                                              name + "q_module")

        self.aux_module = FullyConnectedNetwork(obs_module_output_dummy, None,
                                                model_config["custom_options"]["aux_module_hiddens"][-1],
                                                {"fcnet_activation": model_config["fcnet_activation"],
                                                    "fcnet_hiddens": model_config["custom_options"][
                                                        "aux_module_hiddens"],
                                                    "no_final_linear": True,
                                                    "vf_share_layers": True},
                                                name + "aux_module")

        aux_head_input_dummy = numpy.zeros(model_config["custom_options"]["aux_module_hiddens"][-1])
        self.aux_head = FullyConnectedNetwork(aux_head_input_dummy, None,
                                              numpy.prod(obs_space.original_space["hidden_hand"].shape),
                                              {"fcnet_activation": model_config["fcnet_activation"],
                                                  "fcnet_hiddens": model_config["custom_options"][
                                                      "aux_head_hiddens"],
                                                  "no_final_linear": False,
                                                  "vf_share_layers": True},
                                              name + "aux_head")
        self.register_variables(self.obs_module.variables())
        self.register_variables(self.q_module.variables())
        self.register_variables(self.aux_module.variables())
        self.register_variables(self.aux_head.variables())
        self.aux_loss_formula = get_aux_loss_formula(model_config["custom_options"].get("aux_loss_formula", "sqrt"))
コード例 #2
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(CentralizedCriticModel,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)
        # Base of the model
        self.model = FullyConnectedNetwork(obs_space, action_space,
                                           num_outputs, model_config, name)
        self.register_variables(self.model.variables())

        # Central VF maps (obs, opp_ops, opp_act) -> vf_pred
        self.max_num_agents = model_config['custom_options']['max_num_agents']
        self.obs_space_shape = obs_space.shape[0]
        other_obs = tf.keras.layers.Input(shape=(obs_space.shape[0] *
                                                 self.max_num_agents, ),
                                          name="opp_obs")
        central_vf_dense = tf.keras.layers.Dense(
            model_config['custom_options']['central_vf_size'],
            activation=tf.nn.tanh,
            name="c_vf_dense")(other_obs)
        central_vf_out = tf.keras.layers.Dense(
            1, activation=None, name="c_vf_out")(central_vf_dense)
        self.central_vf = tf.keras.Model(inputs=[other_obs],
                                         outputs=central_vf_out)
        self.register_variables(self.central_vf.variables)
コード例 #3
0
ファイル: custom_env.py プロジェクト: yc-jang/ray
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name):
     super(CustomModel, self).__init__(obs_space, action_space, num_outputs,
                                       model_config, name)
     self.model = FullyConnectedNetwork(obs_space, action_space,
                                        num_outputs, model_config, name)
     self.register_variables(self.model.variables())
コード例 #4
0
 def _build_layers_v2(self, input_dict, num_outputs, options):
     self.obs_in = input_dict["obs"]
     with tf.variable_scope("shared", reuse=tf.AUTO_REUSE):
         self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space,
                                            self.action_space, num_outputs,
                                            options)
     return self.fcnet.outputs, self.fcnet.last_layer
コード例 #5
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(CentralizedCriticModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name)

        self.action_model = FullyConnectedNetwork(
            Box(low=0, high=1, shape=(6, )),  # one-hot encoded Discrete(6)
            action_space,
            num_outputs,
            model_config,
            name + "_action")
        self.register_variables(self.action_model.variables())

        self.value_model = FullyConnectedNetwork(obs_space, action_space, 1,
                                                 model_config, name + "_vf")
        self.register_variables(self.value_model.variables())
コード例 #6
0
 def __init__(self,
              obs_space,
              action_space,
              num_outputs,
              model_config,
              name,
              true_obs_shape=(4, ),
              action_embed_size=6,
              **kw):
     super(ParametricActionsModel, self).__init__(
         obs_space, action_space, num_outputs, model_config, name, **kw)
     if model_config['custom_options']['spy']:
         true_obs_space = make_spy_space(model_config['custom_options']['parties'], model_config['custom_options']['blocks'])
     else:
         true_obs_space = make_blind_space(model_config['custom_options']['parties'], model_config['custom_options']['blocks'])
     if model_config['custom_options']['extended']:
         action_embed_size = 6
     else:
         action_embed_size = 4
     total_dim = 0
     for space in true_obs_space:
         total_dim += get_preprocessor(space)(space).size
     self.action_embed_model = FullyConnectedNetwork(
         Box(-1, 1, shape = (total_dim,)), action_space, action_embed_size,
         model_config, name + "_action_embed")
     self.register_variables(self.action_embed_model.variables())
コード例 #7
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 true_obs_shape=(24, ),
                 action_embed_size=None):
        super(ParametricActionsModel,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        if action_embed_size is None:
            action_embed_size = action_space.n  # this works for Discrete() action

        # we get the size of the output of the preprocessor automatically chosen by rllib for the real_obs space
        real_obs = obs_space.original_space['real_obs']
        true_obs_shape = get_preprocessor(real_obs)(
            real_obs).size  # this will we an integer
        # true_obs_shape = obs_space.original_space['real_obs']
        self.action_embed_model = FullyConnectedNetwork(
            obs_space=Box(-1, 1, shape=(true_obs_shape, )),
            action_space=action_space,
            num_outputs=action_embed_size,
            model_config=model_config,
            name=name + "_action_embed")
        self.base_model = self.action_embed_model.base_model
        self.register_variables(self.action_embed_model.variables())
コード例 #8
0
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name, **kwargs):
     super(HanabiFullyConnected,
           self).__init__(obs_space, action_space, num_outputs,
                          model_config, name, **kwargs)
     self.fc = FullyConnectedNetwork(obs_space.original_space["board"],
                                     action_space, num_outputs,
                                     model_config, name + "fc")
     self.register_variables(self.fc.variables())
コード例 #9
0
ファイル: custom_loss_model.py プロジェクト: AmeerHajAli/ray2
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name)

        self.fcnet = FullyConnectedNetwork(self.obs_space,
                                           self.action_space,
                                           num_outputs,
                                           model_config,
                                           name="fcnet")
        self.register_variables(self.fcnet.variables())
コード例 #10
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(CentralizedCriticModel,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        observation_length = 12
        obs_lower_bound_list = [0, 0, 20] * observation_length
        obs_upper_bound_list = [650, 2, 40] * observation_length
        self.action_model = FullyConnectedNetwork(
            Box(np.array(obs_lower_bound_list),
                np.array(obs_upper_bound_list)),  # one-hot encoded Discrete(6)
            action_space,
            num_outputs,
            model_config,
            name + "_action")
        self.register_variables(self.action_model.variables())

        self.value_model = FullyConnectedNetwork(obs_space, action_space, 1,
                                                 model_config, name + "_vf")
        self.register_variables(self.value_model.variables())
コード例 #11
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kwargs):
        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name, **kwargs)

        # DictFlatteningPreprocessor, combines all obs components together
        # obs.shape for MLP should be a flattened game board obs
        original_space = obs_space.original_space['board']
        flat_obs_space = spaces.Box(low=np.min(original_space.low),
                                    high=np.max(original_space.high),
                                    shape=(np.prod(original_space.shape), ))
        self.mlp = FullyConnectedNetwork(flat_obs_space, action_space,
                                         num_outputs, model_config, name)
        self.register_variables(self.mlp.variables())
コード例 #12
0
 def __init__(self,
              obs_space,
              action_space,
              num_outputs,
              model_config,
              name,
              true_obs_shape=(4, ),
              action_embed_size=2,
              **kw):
     super(ParametricActionsModel, self).__init__(
         obs_space, action_space, num_outputs, model_config, name, **kw)
     self.action_embed_model = FullyConnectedNetwork(
         Box(-1, 1, shape=true_obs_shape), action_space, action_embed_size,
         model_config, name + "_action_embed")
     self.register_variables(self.action_embed_model.variables())
コード例 #13
0
ファイル: rl_utils.py プロジェクト: grossmann-group/or-gym
 def __init__(self,
              obs_space,
              action_space,
              num_outputs,
              model_config,
              name,
              true_obs_shape=(51, 3),
              action_embed_size=50,
              *args,
              **kwargs):
     super(VMActionMaskModel,
           self).__init__(obs_space, action_space, num_outputs,
                          model_config, name, *args, **kwargs)
     self.action_embed_model = FullyConnectedNetwork(
         spaces.Box(0, 1, shape=true_obs_shape), action_space,
         action_embed_size, model_config, name + "_action_embedding")
     self.register_variables(self.action_embed_model.variables())
コード例 #14
0
ファイル: PaModel.py プロジェクト: nicofirst1/rl_werewolf
    def __init__(
        self,
        obs_space,
        action_space,
        num_outputs,
        model_config,
        name,
    ):
        name = "Pa_model"
        super(ParametricActionsModel,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        # get real obs space, discarding action mask
        real_obs_space = obs_space.original_space.spaces['array_obs']

        # define action embed model
        self.action_embed_model = FullyConnectedNetwork(
            real_obs_space, action_space, num_outputs, model_config,
            name + "_action_embed")
        self.register_variables(self.action_embed_model.variables())
コード例 #15
0
ファイル: centralized_critic.py プロジェクト: wwxFromTju/ray
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(CentralizedCriticModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name)
        # Base of the model
        self.model = FullyConnectedNetwork(obs_space, action_space,
                                           num_outputs, model_config, name)
        self.register_variables(self.model.variables())

        # Central VF maps (obs, opp_obs, opp_act) -> vf_pred
        obs = tf.keras.layers.Input(shape=(6, ), name="obs")
        opp_obs = tf.keras.layers.Input(shape=(6, ), name="opp_obs")
        opp_act = tf.keras.layers.Input(shape=(2, ), name="opp_act")
        concat_obs = tf.keras.layers.Concatenate(axis=1)(
            [obs, opp_obs, opp_act])
        central_vf_dense = tf.keras.layers.Dense(
            16, activation=tf.nn.tanh, name="c_vf_dense")(concat_obs)
        central_vf_out = tf.keras.layers.Dense(
            1, activation=None, name="c_vf_out")(central_vf_dense)
        self.central_vf = tf.keras.Model(
            inputs=[obs, opp_obs, opp_act], outputs=central_vf_out)
        self.register_variables(self.central_vf.variables)
コード例 #16
0
ファイル: eager_execution.py プロジェクト: yongjun823/ray
    def __init__(self, observation_space, action_space, num_outputs,
                 model_config, name):
        super().__init__(observation_space, action_space, num_outputs,
                         model_config, name)

        inputs = tf.keras.layers.Input(shape=observation_space.shape)
        self.fcnet = FullyConnectedNetwork(obs_space=self.obs_space,
                                           action_space=self.action_space,
                                           num_outputs=self.num_outputs,
                                           model_config=self.model_config,
                                           name="fc1")
        out, value_out = self.fcnet.base_model(inputs)

        def lambda_(x):
            eager_out = tf.py_function(self.forward_eager, [x], tf.float32)
            with tf.control_dependencies([eager_out]):
                eager_out.set_shape(x.shape)
                return eager_out

        out = tf.keras.layers.Lambda(lambda_)(out)
        self.base_model = tf.keras.models.Model(inputs, [out, value_out])
        self.register_variables(self.base_model.variables)