def __init__(self, state_dim, action_dim, num_options, phi_body=None, actor_body=None, critic_body=None, option_body_fn=None, device='cpu'): super(OptionGaussianActorCriticNet, self).__init__() if phi_body is None: phi_body = DummyBody(state_dim) if critic_body is None: critic_body = DummyBody(phi_body.latent_dim) if actor_body is None: actor_body = DummyBody(phi_body.latent_dim) self.phi_body = phi_body self.actor_body = actor_body self.critic_body = critic_body self.options = nn.ModuleList([SingleOptionNet(action_dim, option_body_fn) for _ in range(num_options)]) self.fc_pi_o = layer_init(nn.Linear(actor_body.latent_dim, num_options), 1e-3) self.fc_q_o = layer_init(nn.Linear(critic_body.latent_dim, num_options), 1e-3) self.num_options = num_options self.action_dim = action_dim self.device = device self.to(device)
def __init__(self, action_dim, body_fn): super(SingleOptionNet, self).__init__() self.pi_body = body_fn() self.beta_body = body_fn() self.fc_pi = layer_init(nn.Linear(self.pi_body.latent_dim, action_dim), 1e-3) self.fc_beta = layer_init(nn.Linear(self.beta_body.latent_dim, 1), 1e-3) self.std = nn.Parameter(torch.zeros((1, action_dim)))
def __init__(self, body, action_dim, num_options, device='cpu'): super(OptionCriticNet, self).__init__() self.fc_q = layer_init(nn.Linear(body.latent_dim, num_options)) self.fc_pi = layer_init(nn.Linear(body.latent_dim, num_options * action_dim)) self.fc_beta = layer_init(nn.Linear(body.latent_dim, num_options)) self.num_options = num_options self.action_dim = action_dim self.body = body self.device = device self.to(device)
def __init__(self, state_dim, hidden_units=(64, 64), gate=F.relu): super(FCBody, self).__init__() dims = [ state_dim, ] + hidden_units self.layers = nn.ModuleList([ layer_init(nn.Linear(dim_in, dim_out)) for dim_in, dim_out in zip(dims[:-1], dims[1:]) ]) self.gate = gate self.latent_dim = dims[-1]
def __init__(self, state_dim, action_dim, num_options, phi_body=None, critic_body=None, option_body_fn=None, device='cpu'): ''' Option_Critic network for continuous action space. Architecture: phi_body: encoder of sorts, environment observation will pass through phi_body before going into policy and critic networks options: Consist of a list of SingleOptionNet, where they form the lower level policy network to output actions, as well as termination for each option fc_q: Network for state option q values ''' super(OptionGaussianActorCriticNet, self).__init__() if phi_body is None: phi_body = DummyBody(state_dim) if critic_body is None: critic_body = DummyBody(phi_body.latent_dim) if option_body_fn is None: option_body_fn = lambda: DummyBody(phi_body.latent_dim) self.phi_body = phi_body self.critic_body = critic_body # build option network self.options = nn.ModuleList([SingleOptionNet(action_dim, option_body_fn) for _ in range(num_options)]) # linear output self.fc_q_o = layer_init(nn.Linear(critic_body.latent_dim, num_options), 1e-3) self.num_options = num_options self.action_dim = action_dim self.device = device self.to(device)