class Network(nn.Module): def __init__( self, in_dim: int, out_dim: int, atom_size: int, support: torch.Tensor ): """Initialization.""" super(Network, self).__init__() self.support = support self.out_dim = out_dim self.atom_size = atom_size # set common feature layer self.feature_layer = nn.Sequential( nn.Linear(in_dim, 128), nn.ReLU(), ) # set advantage layer self.advantage_hidden_layer = NoisyLinear(128, 128) self.advantage_layer = NoisyLinear(128, out_dim * atom_size) # set value layer self.value_hidden_layer = NoisyLinear(128, 128) self.value_layer = NoisyLinear(128, atom_size) def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward method implementation.""" dist = self.dist(x) q = torch.sum(dist * self.support, dim=2) return q def dist(self, x: torch.Tensor) -> torch.Tensor: """Get distribution for atoms.""" feature = self.feature_layer(x) adv_hid = F.relu(self.advantage_hidden_layer(feature)) val_hid = F.relu(self.value_hidden_layer(feature)) advantage = self.advantage_layer(adv_hid).view( -1, self.out_dim, self.atom_size ) value = self.value_layer(val_hid).view(-1, 1, self.atom_size) q_atoms = value + advantage - advantage.mean(dim=1, keepdim=True) dist = F.softmax(q_atoms, dim=-1) dist = dist.clamp(min=1e-3) # for avoiding nans return dist def reset_noise(self): """Reset all noisy layers.""" self.advantage_hidden_layer.reset_noise() self.advantage_layer.reset_noise() self.value_hidden_layer.reset_noise() self.value_layer.reset_noise()
class Agent(nn.Module): def __init__(self, input_shape, num_atoms, num_actions=4): super(Agent, self).__init__() self.input_shape = input_shape self.num_actions = num_actions self.num_atoms = num_atoms self.features = nn.Sequential( nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU()) self.noisy_value1 = NoisyLinear(self.features_size(), 512) self.noisy_value2 = NoisyLinear(512, self.num_atoms) self.noisy_advantage1 = NoisyLinear(self.features_size(), 512) self.noisy_advantage2 = NoisyLinear(512, self.num_atoms * self.num_actions) def features_size(self): return self.features(torch.zeros(1, *self.input_shape)).view(1, -1).size(1) def forward(self, x): batch_size = x.size(0) x = self.features(x) x = x.view(batch_size, -1) value = F.relu(self.noisy_value1(x)) value = self.noisy_value2(value) advantage = F.relu(self.noisy_advantage1(x)) advantage = self.noisy_advantage2(advantage) value = value.view(batch_size, 1, self.num_atoms) advantage = advantage.view(batch_size, self.num_actions, self.num_atoms) x = value + advantage - advantage.mean(1, keepdim=True) x = x.view(-1, self.num_actions, self.num_atoms) return x def reset_noise(self): self.noisy_value1.reset_noise() self.noisy_value2.reset_noise() self.noisy_advantage1.reset_noise() self.noisy_advantage2.reset_noise() def act(self, state, epsilon): if np.random.rand() > epsilon: with torch.no_grad(): state = torch.FloatTensor(state).unsqueeze(0) qvalues = self.forward(state).mean(2) action = qvalues.max(1)[1] action = action.data.cpu().numpy()[0] else: action = np.random.randint(self.num_actions) return action
class Dqn(Qnet): def __init__(self, states_size: np.ndarray, action_size: np.ndarray, settings: dict) -> None: """ Initializes the neural network. Args: states_size: Size of the input space. action_size:Size of the action space. settings: dictionary with settings """ super(Dqn, self).__init__() self.batch_size = settings["batch_size"] self.noisy_net = settings['noisy_net'] layers_size = settings["layers_sizes"][0] if not self.noisy_net: self.FC1 = nn.Linear(int(states_size), layers_size) self.FC2 = nn.Linear(layers_size, layers_size) self.FC3 = nn.Linear(layers_size, int(action_size)) else: self.FC1 = NoisyLinear(int(states_size), layers_size ) self.FC2 = NoisyLinear(layers_size, layers_size) self.FC3 = NoisyLinear(layers_size, int(action_size)) self.reset() def forward(self, x: torch.Tensor) -> torch.Tensor: """ Forward step of the neural network Args: x(torch.Tensor): observation or a batch of observations Returns: torch.Tensor: q-values for all observations and actions, size: batch_size x actions_size """ x = functional.relu(self.FC1(x)) x = functional.relu(self.FC2(x)) return self.FC3(x) def reset(self) -> None: """ Resets the weights of the neural network layers. Returns: None """ torch.nn.init.xavier_uniform_(self.FC1.weight.data) torch.nn.init.xavier_uniform_(self.FC2.weight.data) torch.nn.init.xavier_uniform_(self.FC3.weight.data) if self.noisy_net: self.reset_noise() def reset_noise(self) -> None: """ Resets the noise of the noisy layers. """ self.FC1.reset_noise() self.FC2.reset_noise() self.FC3.reset_noise()
class DuelDQN(Qnet): def __init__(self, states_size: np.ndarray, action_size: np.ndarray, settings: dict) -> None: """ Initializes the neural network. Args: states_size: Size of the input space. action_size:Size of the action space. settings: dictionary with settings, currently not used. """ super(DuelDQN, self).__init__() self.batch_size = settings["batch_size"] layers_size = settings["layers_sizes"][0] self.noisy_net = settings['noisy_nets'] if not self.noisy_net: self.FC1 = nn.Linear(int(states_size), layers_size) self.FC2 = nn.Linear(layers_size, layers_size) self.FC3v = nn.Linear(layers_size, 1) self.FC3a = nn.Linear(layers_size, int(action_size)) else: self.FC1 = NoisyLinear(int(states_size), layers_size) self.FC2 = NoisyLinear(layers_size, layers_size) self.FC3v = NoisyLinear(layers_size, 1) self.FC3a = NoisyLinear(layers_size, int(action_size)) self.reset() def forward(self, x: torch.Tensor) -> torch.Tensor: """ Forward step of the duelling q-network Args: x(torch.Tensor): observation or a batch of observations Returns: torch.Tensor: q-values for all observations and actions """ x = functional.relu(self.FC1(x)) x = functional.relu(self.FC2(x)) v = self.FC3v(x) a = self.FC3a(x) if x.ndimension() == 1: qvals = v + (a - torch.mean(a)) else: qvals = v + (a - torch.mean(a, 1, True)) return qvals def reset(self) -> None: """ Resets the weights of the neural network layers. Returns: None """ torch.nn.init.xavier_uniform_(self.FC1.weight.data) torch.nn.init.xavier_uniform_(self.FC2.weight.data) torch.nn.init.xavier_uniform_(self.FC3a.weight.data) torch.nn.init.xavier_uniform_(self.FC3v.weight.data) if self.noisy_net: self.reset_noise() def reset_noise(self) -> None: """ Resets the noise of the noisy layers. """ self.FC1.reset_noise() self.FC2.reset_noise() self.FC3a.reset_noise() self.FC3v.reset_noise()
class DistributionalDuelDQN(nn.Module, DistributionalNetHelper): def __init__(self, states_size: int, action_size: int, settings: dict, device: torch.device) -> None: """ Initializes the DistributionalDuelDqn Args: states_size (int): Size of the input space. action_size (int):Size of the action space. settings (dict): dictionary with settings device( torch.device): "gpu" or "cpu" """ super(DistributionalDuelDQN, self).__init__() DistributionalNetHelper.__init__(self, settings, neural_network_call=self.forward, device=device) self.batch_size = settings["batch_size"] self.number_atoms = settings["number_atoms"] layers_size = settings["layers_sizes"][0] self.noisy_net = settings['noisy_nets'] if not self.noisy_net: self.FC1 = nn.Linear(int(states_size), layers_size) self.FC2 = nn.Linear(layers_size, layers_size) self.FC3v = nn.Linear(layers_size, self.number_atoms) self.FC3a = nn.Linear(layers_size, int(action_size * self.number_atoms)) else: self.FC1 = NoisyLinear(int(states_size), layers_size) self.FC2 = NoisyLinear(layers_size, layers_size) self.FC3v = NoisyLinear(layers_size, self.number_atoms) self.FC3a = NoisyLinear(layers_size, int(action_size) * self.number_atoms) self.reset() def forward(self, x: torch.Tensor) -> torch.Tensor: """ Forward pass of the distributional neural networ Args: x(torch.Tensor): a batch of observations Returns: torch.Tensor: distributions for each sample and action, size: batch_size x action_size x number_atoms """ if x.ndimension() == 1: batch_size = 1 else: batch_size = x.size()[0] x = nn.functional.relu(self.FC1(x)) x = nn.functional.relu(self.FC2(x)) a = self.FC3a(x) v = self.FC3v(x) a = a.view([batch_size, -1, self.number_atoms]) average = a.mean(1).unsqueeze(1) a_scaled = a - average if batch_size > 1: v = v.unsqueeze(1) return_vals = v + a_scaled return return_vals def reset(self) -> None: """ Resets the weights of the neural network layers and the noise of the noisy layers. Returns: None """ torch.nn.init.xavier_uniform_(self.FC1.weight.data) torch.nn.init.xavier_uniform_(self.FC2.weight.data) torch.nn.init.xavier_uniform_(self.FC3a.weight.data) torch.nn.init.xavier_uniform_(self.FC3v.weight.data) if self.noisy_net: self.reset_noise() def reset_noise(self) -> None: """ Samples noise for the noisy layers. """ self.FC1.reset_noise() self.FC2.reset_noise() self.FC3a.reset_noise() self.FC3v.reset_noise()