def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Convolutional Neural Net for the Actor network for Continuous outputs Network Architecture: (input) -> CNN -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride) that describes the cnn architecture hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN activation (nn.modules.activation): Activation function for each layer of MLP act_limit (float): the greatest magnitude possible for the action in the environment ''' super().__init__() log_std = -0.5 * np.ones(act_dim, dtype=np.float32) self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) self.mu_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True) self.start_dim = self.calc_shape(obs_dim, self.mu_cnn) mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim] self.mu_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) # initialise actor network final layer weights to be 1/100 of other weights self.mu_mlp[ -2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights
def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation, act_limit): ''' A Convolutional Neural Net for the Actor network Network Architecture: (input) -> CNN -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride) that describes the cnn architecture hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN activation (nn.modules.activation): Activation function for each layer of MLP act_limit (float): the greatest magnitude possible for the action in the environment ''' super().__init__() self.pi_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True) self.start_dim = self.calc_shape(obs_dim, self.pi_cnn) mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim] self.pi_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) self.act_limit = act_limit
def calc_shape(self, obs_dim, cnn): ''' Function to determine the shape of the data after the conv layers to determine how many neurons for the MLP. ''' C, H, W = obs_dim dummy_input = torch.randn(1, C, H, W) with torch.no_grad(): cnn_out = cnn(dummy_input) shape = cnn_out.view(-1, ).shape[0] return shape
def __init__(self, obs_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Convolutional Neural Net for the Critic network Args: obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride) that describes the cnn architecture hidden_sizes (list): list of number of neurons in each layer of MLP activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.v_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True) self.start_dim = self.calc_shape(obs_dim, self.v_cnn) self.v_mlp = mlp([self.start_dim] + list(hidden_sizes) + [1], activation)
def __init__(self, obs_dim, act_dim, conv_layer_sizes, hidden_sizes, activation): ''' A Convolutional Neural Net for the Actor network for discrete outputs Network Architecture: (input) -> CNN -> MLP -> (output) Assume input is in the shape: (3, 128, 128) Args: obs_dim (tuple): observation dimension of the environment in the form of (C, H, W) act_dim (int): action dimension of the environment conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride) that describes the cnn architecture hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN activation (nn.modules.activation): Activation function for each layer of MLP ''' super().__init__() self.logits_cnn = cnn(obs_dim[0], conv_layer_sizes, activation, batchnorm=True) self.start_dim = self.calc_shape(obs_dim, self.logits_cnn) mlp_sizes = [self.start_dim] + list(hidden_sizes) + [act_dim] self.logits_mlp = mlp(mlp_sizes, activation, output_activation=nn.Tanh) # initialise actor network final layer weights to be 1/100 of other weights self.logits_mlp[-2].weight.data /= 100 # last layer is Identity, so we tweak second last layer weights