def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Convolutional Neural Net for the Actor network for Continuous outputs Network Architecture: (input) -> CNN -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride) that describes the cnn architecture hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN activation (nn.modules.activation): Activation function for each layer of MLP act_limit (float): the greatest magnitude possible for the action in the environment ''' super().__init__() log_std = -0.5 * np.ones(act_dim, dtype=np.float32) self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) self.mu_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True) self.start_dim = self.calc_shape(obs_dim, self.mu_cnn) mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim] self.mu_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) # initialise actor network final layer weights to be 1/100 of other weights self.mu_mlp[ -2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation, act_limit): ''' A Convolutional Neural Net for the Actor network Network Architecture: (input) -> CNN -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride) that describes the cnn architecture hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN activation (nn.modules.activation): Activation function for each layer of MLP act_limit (float): the greatest magnitude possible for the action in the environment ''' super().__init__() self.pi_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True) self.start_dim = self.calc_shape(obs_dim, self.pi_cnn) mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim] self.pi_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) self.act_limit = act_limit
def __init__(self, obs_dim, hidden_sizes, activation): ''' A Multi-Layer Perceptron for the Critic network Args: obs_dim (int): observation dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.v_net = mlp([obs_dim] + list(hidden_sizes) + [1], activation)
def __init__(self, obs_dim, act_dim, hidden_sizes, activation): ''' A Multi-Layer Perceptron for the Critic network Args: obs_dim (int): observation dimension of the environment act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.logits_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation) # initialise actor network final layer weights to be 1/100 of other weights self.logits_net[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
def __init__(self, obs_dim, act_dim, hidden_sizes, activation): ''' A Multi-Layer Perceptron for the gaussian Actor network for continuous actions Args: obs_dim (int): observation dimension of the environment act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() log_std = -0.5*np.ones(act_dim, dtype=np.float32) self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) self.mu_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation) self.mu_net[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
def __init__(self, vae_weights_path, obs_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Variational Autoencoder Net for the Critic network Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.v_vae = VAE() self.v_vae.load_weights(vae_weights_path) self.v_mlp = mlp([self.v_vae.latent_dim] + list(hidden_sizes) + [1], activation)
def __init__(self, obs_dim, act_dim, hidden_sizes, activation, act_limit): ''' A Multi-Layer Perceptron for the Actor network Args: obs_dim (int): observation dimension of the environment act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP activation (nn.modules.activation): Activation function for each layer of MLP act_limit (float): the greatest magnitude possible for the action in the environment ''' super().__init__() pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim] self.pi = mlp(pi_sizes, activation, output_activation=nn.Tanh) self.act_limit = act_limit
def __init__(self, obs_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Convolutional Neural Net for the Critic network Args: obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride) that describes the cnn architecture hidden_sizes (list): list of number of neurons in each layer of MLP activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.v_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True) self.start_dim = self.calc_shape(obs_dim, self.v_cnn) self.v_mlp = mlp([self.start_dim] + list(hidden_sizes) + [1], activation)
def __init__(self, vae_weights_path, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Convolutional Neural Net for the Actor network for Continuous outputs Network Architecture: (input) -> VAE -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP after output from VAE activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() log_std = -0.5*np.ones(act_dim, dtype=np.float32) self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) self.mu_vae = VAE() mlp_sizes = [self.mu_vae.latent_dim] + list(hidden_sizes) + [act_dim] self.mu_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) # initialise actor network final layer weights to be 1/100 of other weights self.mu_mlp[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
def __init__(self, vae_weights_path, obs_dim, act_dim, hidden_sizes, activation): ''' A Variational Autoencoder Net for the Actor network for discrete outputs Network Architecture: (input) -> VAE -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP after output from VAE activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.logits_vae = VAE() self.logits_vae.load_weights(vae_weights_path) mlp_sizes = [self.logits_vae.latent_dim] + list(hidden_sizes) + [act_dim] self.logits_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) # initialise actor network final layer weights to be 1/100 of other weights self.logits_mlp[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Convolutional Neural Net for the Actor network for discrete outputs Network Architecture: (input) -> CNN -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride) that describes the cnn architecture hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.logits_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True) self.start_dim = self.calc_shape(obs_dim, self.logits_cnn) mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim] self.logits_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) # initialise actor network final layer weights to be 1/100 of other weights self.logits_mlp[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
def __init__(self, vae_weights_path, obs_dim, act_dim, hidden_sizes, activation, act_limit): ''' A Variational Autoencoder for the Actor network Network Architecture: (input) -> VAE -> MLP -> (output) The VAE is pretrained on observation images. Assume observation space is in the shape: (3, 128, 128) Args: vae_weights_path (Str): Path to the vae weights file obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN activation (nn.modules.activation): Activation function for each layer of MLP act_limit (float): the greatest magnitude possible for the action in the environment ''' super().__init__() self.pi_vae = VAE() self.pi_vae.load_weights(vae_weights_path) mlp_sizes = [self.pi_vae.latent_dim] + list(hidden_sizes) + [act_dim] self.pi_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) self.act_limit = act_limit