Ejemplo n.º 1
0
    def __init__(self,
                 state_dim,
                 action_dim,
                 num_options,
                 phi_body=None,
                 actor_body=None,
                 critic_body=None,
                 option_body_fn=None,
                 device='cpu'):
        super(OptionGaussianActorCriticNet, self).__init__()
        if phi_body is None: phi_body = DummyBody(state_dim)
        if critic_body is None: critic_body = DummyBody(phi_body.latent_dim)
        if actor_body is None: actor_body = DummyBody(phi_body.latent_dim)

        self.phi_body = phi_body
        self.actor_body = actor_body
        self.critic_body = critic_body

        self.options = nn.ModuleList([SingleOptionNet(action_dim, option_body_fn) for _ in range(num_options)])

        self.fc_pi_o = layer_init(nn.Linear(actor_body.latent_dim, num_options), 1e-3)
        self.fc_q_o = layer_init(nn.Linear(critic_body.latent_dim, num_options), 1e-3)

        self.num_options = num_options
        self.action_dim = action_dim
        self.device = device
        self.to(device)
Ejemplo n.º 2
0
 def __init__(self,
              action_dim,
              body_fn):
     super(SingleOptionNet, self).__init__()
     self.pi_body = body_fn()
     self.beta_body = body_fn()
     self.fc_pi = layer_init(nn.Linear(self.pi_body.latent_dim, action_dim), 1e-3)
     self.fc_beta = layer_init(nn.Linear(self.beta_body.latent_dim, 1), 1e-3)
     self.std = nn.Parameter(torch.zeros((1, action_dim)))
Ejemplo n.º 3
0
 def __init__(self, body, action_dim, num_options, device='cpu'):
     super(OptionCriticNet, self).__init__()
     self.fc_q = layer_init(nn.Linear(body.latent_dim, num_options))
     self.fc_pi = layer_init(nn.Linear(body.latent_dim, num_options * action_dim))
     self.fc_beta = layer_init(nn.Linear(body.latent_dim, num_options))
     self.num_options = num_options
     self.action_dim = action_dim
     self.body = body
     self.device = device
     self.to(device)
Ejemplo n.º 4
0
 def __init__(self, state_dim, hidden_units=(64, 64), gate=F.relu):
     super(FCBody, self).__init__()
     dims = [
         state_dim,
     ] + hidden_units
     self.layers = nn.ModuleList([
         layer_init(nn.Linear(dim_in, dim_out))
         for dim_in, dim_out in zip(dims[:-1], dims[1:])
     ])
     self.gate = gate
     self.latent_dim = dims[-1]
Ejemplo n.º 5
0
    def __init__(self,
                 state_dim,
                 action_dim,
                 num_options,
                 phi_body=None,
                 critic_body=None,
                 option_body_fn=None,
                 device='cpu'):
        '''
        Option_Critic network for continuous action space.
        Architecture:
            phi_body: encoder of sorts, environment observation will pass through phi_body 
                        before going into policy and critic networks
            options: Consist of a list of SingleOptionNet, 
                    where they form the lower level policy network to output actions, 
                    as well as termination for each option
            fc_q: Network for state option q values
        '''
        super(OptionGaussianActorCriticNet, self).__init__()
        if phi_body is None: phi_body = DummyBody(state_dim)
        if critic_body is None: critic_body = DummyBody(phi_body.latent_dim)
        if option_body_fn is None: option_body_fn = lambda: DummyBody(phi_body.latent_dim)

        self.phi_body = phi_body
        self.critic_body = critic_body

        # build option network
        self.options = nn.ModuleList([SingleOptionNet(action_dim, option_body_fn) for _ in range(num_options)])

        # linear output
        self.fc_q_o = layer_init(nn.Linear(critic_body.latent_dim, num_options), 1e-3)

        self.num_options = num_options
        self.action_dim = action_dim
        self.device = device
        self.to(device)