Exemple #1
0
class Network(nn.Module):
    def __init__(
        self, 
        in_dim: int, 
        out_dim: int, 
        atom_size: int, 
        support: torch.Tensor
    ):
        """Initialization."""
        super(Network, self).__init__()
        
        self.support = support
        self.out_dim = out_dim
        self.atom_size = atom_size

        # set common feature layer
        self.feature_layer = nn.Sequential(
            nn.Linear(in_dim, 128), 
            nn.ReLU(),
        )
        
        # set advantage layer
        self.advantage_hidden_layer = NoisyLinear(128, 128)
        self.advantage_layer = NoisyLinear(128, out_dim * atom_size)

        # set value layer
        self.value_hidden_layer = NoisyLinear(128, 128)
        self.value_layer = NoisyLinear(128, atom_size)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Forward method implementation."""
        dist = self.dist(x)
        q = torch.sum(dist * self.support, dim=2)
        
        return q
    
    def dist(self, x: torch.Tensor) -> torch.Tensor:
        """Get distribution for atoms."""
        feature = self.feature_layer(x)
        adv_hid = F.relu(self.advantage_hidden_layer(feature))
        val_hid = F.relu(self.value_hidden_layer(feature))
        
        advantage = self.advantage_layer(adv_hid).view(
            -1, self.out_dim, self.atom_size
        )
        value = self.value_layer(val_hid).view(-1, 1, self.atom_size)
        q_atoms = value + advantage - advantage.mean(dim=1, keepdim=True)
        
        dist = F.softmax(q_atoms, dim=-1)
        dist = dist.clamp(min=1e-3)  # for avoiding nans
        
        return dist
    
    def reset_noise(self):
        """Reset all noisy layers."""
        self.advantage_hidden_layer.reset_noise()
        self.advantage_layer.reset_noise()
        self.value_hidden_layer.reset_noise()
        self.value_layer.reset_noise()
Exemple #2
0
class Agent(nn.Module):
    def __init__(self, input_shape, num_atoms, num_actions=4):
        super(Agent, self).__init__()

        self.input_shape = input_shape
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.features = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU())
        self.noisy_value1 = NoisyLinear(self.features_size(), 512)
        self.noisy_value2 = NoisyLinear(512, self.num_atoms)
        self.noisy_advantage1 = NoisyLinear(self.features_size(), 512)
        self.noisy_advantage2 = NoisyLinear(512,
                                            self.num_atoms * self.num_actions)

    def features_size(self):
        return self.features(torch.zeros(1,
                                         *self.input_shape)).view(1,
                                                                  -1).size(1)

    def forward(self, x):
        batch_size = x.size(0)
        x = self.features(x)
        x = x.view(batch_size, -1)
        value = F.relu(self.noisy_value1(x))
        value = self.noisy_value2(value)
        advantage = F.relu(self.noisy_advantage1(x))
        advantage = self.noisy_advantage2(advantage)
        value = value.view(batch_size, 1, self.num_atoms)
        advantage = advantage.view(batch_size, self.num_actions,
                                   self.num_atoms)
        x = value + advantage - advantage.mean(1, keepdim=True)
        x = x.view(-1, self.num_actions, self.num_atoms)
        return x

    def reset_noise(self):
        self.noisy_value1.reset_noise()
        self.noisy_value2.reset_noise()
        self.noisy_advantage1.reset_noise()
        self.noisy_advantage2.reset_noise()

    def act(self, state, epsilon):
        if np.random.rand() > epsilon:
            with torch.no_grad():
                state = torch.FloatTensor(state).unsqueeze(0)
            qvalues = self.forward(state).mean(2)
            action = qvalues.max(1)[1]
            action = action.data.cpu().numpy()[0]
        else:
            action = np.random.randint(self.num_actions)
        return action
Exemple #3
0
class Dqn(Qnet):
    def __init__(self, states_size: np.ndarray, action_size: np.ndarray, settings: dict) -> None:
        """
        Initializes the neural network.
        Args:
            states_size: Size of the input space.
            action_size:Size of the action space.
            settings: dictionary with settings
        """
        super(Dqn, self).__init__()
        self.batch_size = settings["batch_size"]
        self.noisy_net = settings['noisy_net']
        layers_size = settings["layers_sizes"][0]
        if not self.noisy_net:
            self.FC1 = nn.Linear(int(states_size), layers_size)
            self.FC2 = nn.Linear(layers_size, layers_size)
            self.FC3 = nn.Linear(layers_size, int(action_size))
        else:
            self.FC1 = NoisyLinear(int(states_size), layers_size )
            self.FC2 = NoisyLinear(layers_size, layers_size)
            self.FC3 = NoisyLinear(layers_size, int(action_size))
        self.reset()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward step of the neural network
        Args:
            x(torch.Tensor): observation or a batch of observations

        Returns:
            torch.Tensor: q-values for all observations and actions, size: batch_size x actions_size
        """
        x = functional.relu(self.FC1(x))
        x = functional.relu(self.FC2(x))
        return self.FC3(x)

    def reset(self) -> None:
        """
        Resets the weights of the neural network layers.
        Returns:
            None
        """
        torch.nn.init.xavier_uniform_(self.FC1.weight.data)
        torch.nn.init.xavier_uniform_(self.FC2.weight.data)
        torch.nn.init.xavier_uniform_(self.FC3.weight.data)
        if self.noisy_net:
            self.reset_noise()

    def reset_noise(self) -> None:
        """
        Resets the noise of the noisy layers.
        """
        self.FC1.reset_noise()
        self.FC2.reset_noise()
        self.FC3.reset_noise()
Exemple #4
0
class DuelDQN(Qnet):
    def __init__(self, states_size: np.ndarray, action_size: np.ndarray, settings: dict) -> None:
        """
        Initializes the neural network.
        Args:
            states_size: Size of the input space.
            action_size:Size of the action space.
            settings: dictionary with settings, currently not used.
        """
        super(DuelDQN, self).__init__()
        self.batch_size = settings["batch_size"]
        layers_size = settings["layers_sizes"][0]
        self.noisy_net = settings['noisy_nets']
        if not self.noisy_net:
            self.FC1 = nn.Linear(int(states_size), layers_size)
            self.FC2 = nn.Linear(layers_size, layers_size)
            self.FC3v = nn.Linear(layers_size, 1)
            self.FC3a = nn.Linear(layers_size, int(action_size))
        else:
            self.FC1 = NoisyLinear(int(states_size), layers_size)
            self.FC2 = NoisyLinear(layers_size, layers_size)
            self.FC3v = NoisyLinear(layers_size, 1)
            self.FC3a = NoisyLinear(layers_size, int(action_size))
        self.reset()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward step of the duelling q-network
        Args:
            x(torch.Tensor): observation or a batch of observations

        Returns:
            torch.Tensor: q-values for all  observations and actions
        """
        x = functional.relu(self.FC1(x))
        x = functional.relu(self.FC2(x))
        v = self.FC3v(x)
        a = self.FC3a(x)
        if x.ndimension() == 1:
            qvals = v + (a - torch.mean(a))
        else:
            qvals = v + (a - torch.mean(a, 1, True))
        return qvals

    def reset(self) -> None:
        """
        Resets the weights of the neural network layers.
        Returns:
            None
        """
        torch.nn.init.xavier_uniform_(self.FC1.weight.data)
        torch.nn.init.xavier_uniform_(self.FC2.weight.data)
        torch.nn.init.xavier_uniform_(self.FC3a.weight.data)
        torch.nn.init.xavier_uniform_(self.FC3v.weight.data)
        if self.noisy_net:
            self.reset_noise()

    def reset_noise(self) -> None:
        """
        Resets the noise of the noisy layers.
        """
        self.FC1.reset_noise()
        self.FC2.reset_noise()
        self.FC3a.reset_noise()
        self.FC3v.reset_noise()
Exemple #5
0
class DistributionalDuelDQN(nn.Module, DistributionalNetHelper):
    def __init__(self, states_size: int, action_size: int, settings: dict,
                 device: torch.device) -> None:
        """
        Initializes the DistributionalDuelDqn
        Args:
            states_size (int): Size of the input space.
            action_size (int):Size of the action space.
            settings (dict): dictionary with settings
            device( torch.device): "gpu" or "cpu"
        """
        super(DistributionalDuelDQN, self).__init__()
        DistributionalNetHelper.__init__(self,
                                         settings,
                                         neural_network_call=self.forward,
                                         device=device)
        self.batch_size = settings["batch_size"]
        self.number_atoms = settings["number_atoms"]
        layers_size = settings["layers_sizes"][0]
        self.noisy_net = settings['noisy_nets']
        if not self.noisy_net:
            self.FC1 = nn.Linear(int(states_size), layers_size)
            self.FC2 = nn.Linear(layers_size, layers_size)
            self.FC3v = nn.Linear(layers_size, self.number_atoms)
            self.FC3a = nn.Linear(layers_size,
                                  int(action_size * self.number_atoms))
        else:
            self.FC1 = NoisyLinear(int(states_size), layers_size)
            self.FC2 = NoisyLinear(layers_size, layers_size)
            self.FC3v = NoisyLinear(layers_size, self.number_atoms)
            self.FC3a = NoisyLinear(layers_size,
                                    int(action_size) * self.number_atoms)
        self.reset()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward pass of the distributional neural networ
        Args:
            x(torch.Tensor): a batch of observations
        Returns:
            torch.Tensor: distributions for each sample and action, size: batch_size x action_size x number_atoms
        """
        if x.ndimension() == 1:
            batch_size = 1
        else:
            batch_size = x.size()[0]
        x = nn.functional.relu(self.FC1(x))
        x = nn.functional.relu(self.FC2(x))
        a = self.FC3a(x)
        v = self.FC3v(x)

        a = a.view([batch_size, -1, self.number_atoms])
        average = a.mean(1).unsqueeze(1)
        a_scaled = a - average
        if batch_size > 1:
            v = v.unsqueeze(1)
        return_vals = v + a_scaled
        return return_vals

    def reset(self) -> None:
        """
        Resets the weights of the neural network layers and the noise of the noisy layers.
        Returns:
            None
        """
        torch.nn.init.xavier_uniform_(self.FC1.weight.data)
        torch.nn.init.xavier_uniform_(self.FC2.weight.data)
        torch.nn.init.xavier_uniform_(self.FC3a.weight.data)
        torch.nn.init.xavier_uniform_(self.FC3v.weight.data)
        if self.noisy_net:
            self.reset_noise()

    def reset_noise(self) -> None:
        """
        Samples noise for the noisy layers.
        """
        self.FC1.reset_noise()
        self.FC2.reset_noise()
        self.FC3a.reset_noise()
        self.FC3v.reset_noise()