class VAECritic(nn.Module): def __init__(self, vae_weights_path, obs_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Variational Autoencoder Net for the Critic network Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.v_vae = VAE() self.v_vae.load_weights(vae_weights_path) self.v_mlp = mlp([self.v_vae.latent_dim] + list(hidden_sizes) + [1], activation) def forward(self, obs): ''' Forward propagation for critic network Args: obs (Tensor [n, obs_dim]): batch of observation from environment ''' obs = self.v_vae(obs) v = self.v_mlp(obs) return torch.squeeze(v, -1) # ensure q has the right shape def dataparallel(self, ngpu): print(f"Critic network using {ngpu} gpus, gpu id: {list(range(ngpu))}") self.v_vae.dataparallel(ngpu) self.v_mlp = nn.DataParallel(self.v_mlp, list(range(ngpu)))
class VAEGaussianActor(Actor): def __init__(self, vae_weights_path, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Convolutional Neural Net for the Actor network for Continuous outputs Network Architecture: (input) -> VAE -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP after output from VAE activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() log_std = -0.5 * np.ones(act_dim, dtype=np.float32) self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) self.mu_vae = VAE() mlp_sizes = [self.mu_vae.latent_dim] + list(hidden_sizes) + [act_dim] self.mu_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) # initialise actor network final layer weights to be 1/100 of other weights self.mu_mlp[ -2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights def _distribution(self, obs): ''' Forward propagation for actor network Args: obs (Tensor [n, obs_dim]): batch of observation from environment Return: Categorical distribution from output of model ''' obs = self.mu_vae(obs) mu = self.mu_mlp(obs) std = torch.exp(self.log_std) return Normal(mu, std) def _log_prob_from_distribution(self, pi, act): ''' Args: pi: distribution from _distribution() function act: log probability of selecting action act from the given distribution pi ''' return pi.log_prob(act).sum( axis=-1) # last axis sum needed for Torch Normal Distribution def dataparallel(self, ngpu): print(f"Actor network using {ngpu} gpus, gpu id: {list(range(ngpu))}") self.mu_vae.dataparallel(ngpu) self.mu_mlp = nn.DataParallel(self.mu_mlp, list(range(ngpu)))
def __init__(self, vae_weights_path, obs_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Variational Autoencoder Net for the Critic network Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.v_vae = VAE() self.v_vae.load_weights(vae_weights_path) self.v_mlp = mlp([self.v_vae.latent_dim] + list(hidden_sizes) + [1], activation)
class VAECategoricalActor(Actor): def __init__(self, vae_weights_path, obs_dim, act_dim, hidden_sizes, activation): ''' A Variational Autoencoder Net for the Actor network for discrete outputs Network Architecture: (input) -> VAE -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP after output from VAE activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.logits_vae = VAE() self.logits_vae.load_weights(vae_weights_path) mlp_sizes = [self.logits_vae.latent_dim ] + list(hidden_sizes) + [act_dim] self.logits_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) # initialise actor network final layer weights to be 1/100 of other weights self.logits_mlp[ -2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights def _distribution(self, obs): ''' Forward propagation for actor network Args: obs (Tensor [n, obs_dim]): batch of observation from environment Return: Categorical distribution from output of model ''' obs = self.logits_vae(obs) logits = self.logits_mlp(obs) return Categorical(logits=logits) def _log_prob_from_distribution(self, pi, act): ''' Args: pi: distribution from _distribution() function act: log probability of selecting action act from the given distribution pi ''' return pi.log_prob(act) def dataparallel(self, ngpu): print(f"Actor network using {ngpu} gpus, gpu id: {list(range(ngpu))}") self.logits_vae.dataparallel(ngpu) self.logits_mlp = nn.DataParallel(self.logits_mlp, list(range(ngpu)))
def get_network_fn(self, oc_kwargs): activation = nn.ReLU gate = F.relu obs_space = self.env.observation_space.shape hidden_units = oc_kwargs['hidden_sizes'] act_dim = self.env.action_space.shape[0] self.continuous = True if len(obs_space) > 1: # image observations phi_body = VAE(load_path =oc_kwargs['vae_weights_path'], device=self.device) if oc_kwargs['model_type'].lower() == 'vae' \ else ConvBody(obs_space, oc_kwargs['conv_layer_sizes'], activation, batchnorm=False) state_dim = phi_body.latent_dim else: state_dim = obs_space[0] phi_body = DummyBody(state_dim) network_fn = lambda: OptionGaussianActorCriticNet( state_dim, act_dim, num_options=oc_kwargs['num_options'], phi_body=phi_body, critic_body=FCBody(state_dim, hidden_units=hidden_units, gate=gate ), option_body_fn=lambda: FCBody( state_dim, hidden_units=hidden_units, gate=gate), device=self.device) return network_fn
def __init__(self, vae_weights_path, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Convolutional Neural Net for the Actor network for Continuous outputs Network Architecture: (input) -> VAE -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP after output from VAE activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() log_std = -0.5*np.ones(act_dim, dtype=np.float32) self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) self.mu_vae = VAE() mlp_sizes = [self.mu_vae.latent_dim] + list(hidden_sizes) + [act_dim] self.mu_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) # initialise actor network final layer weights to be 1/100 of other weights self.mu_mlp[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
def __init__(self, vae_weights_path, obs_dim, act_dim, hidden_sizes, activation): ''' A Variational Autoencoder Net for the Actor network for discrete outputs Network Architecture: (input) -> VAE -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP after output from VAE activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.logits_vae = VAE() self.logits_vae.load_weights(vae_weights_path) mlp_sizes = [self.logits_vae.latent_dim] + list(hidden_sizes) + [act_dim] self.logits_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) # initialise actor network final layer weights to be 1/100 of other weights self.logits_mlp[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
class VAEActor(nn.Module): def __init__(self, vae_weights_path, obs_dim, act_dim, hidden_sizes, activation, act_limit): ''' A Variational Autoencoder for the Actor network Network Architecture: (input) -> VAE -> MLP -> (output) The VAE is pretrained on observation images. Assume observation space is in the shape: (3, 128, 128) Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN activation (nn.modules.activation): Activation function for each layer of MLP act_limit (float): the greatest magnitude possible for the action in the environment ''' super().__init__() self.pi_vae = VAE() self.pi_vae.load_weights(vae_weights_path) mlp_sizes = [self.pi_vae.latent_dim] + list(hidden_sizes) + [act_dim] self.pi_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) self.act_limit = act_limit def forward(self, obs): ''' Forward propagation for actor network Args: obs (Tensor [n, obs_dim]): batch of observation from environment Return: output of actor network * act_limit ''' obs = self.pi_vae(obs) obs = self.pi_mlp(obs) return obs * self.act_limit def dataparallel(self, ngpu): print(f"Actor Network using {ngpu} gpus, gpu id: {list(range(ngpu))}") self.pi_vae.dataparallel(ngpu) self.pi_mlp = nn.DataParallel(self.pi_mlp, list(range(ngpu)))
def __init__(self, vae_weights_path, obs_dim, act_dim, hidden_sizes, activation, act_limit): ''' A Variational Autoencoder for the Actor network Network Architecture: (input) -> VAE -> MLP -> (output) The VAE is pretrained on observation images. Assume observation space is in the shape: (3, 128, 128) Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN activation (nn.modules.activation): Activation function for each layer of MLP act_limit (float): the greatest magnitude possible for the action in the environment ''' super().__init__() self.pi_vae = VAE() self.pi_vae.load_weights(vae_weights_path) mlp_sizes = [self.pi_vae.latent_dim] + list(hidden_sizes) + [act_dim] self.pi_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) self.act_limit = act_limit