def __init__(self, input_shape, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowCnnDQN, self).__init__()

        self.input_shape = input_shape
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.Vmin = Vmin
        self.Vmax = Vmax

        #Definition of the neural network
        #Conv2d(input_shape, output_shape, kernel_size, stride)
        #(kernel_size = size of the convolutional filter)
        #(stride = step used to shift the convolutional filter)
        #ReLu --> max(0,x)
        self.features = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU())

        #NoisyLinear( in_features, out_features, use_cuda)
        #(Layers after the features)
        self.noisy_value1 = NoisyLinear(self.feature_size(),
                                        512,
                                        use_cuda=USE_CUDA)
        self.noisy_value2 = NoisyLinear(512, self.num_atoms, use_cuda=USE_CUDA)

        self.noisy_advantage1 = NoisyLinear(self.feature_size(),
                                            512,
                                            use_cuda=USE_CUDA)
        self.noisy_advantage2 = NoisyLinear(512,
                                            self.num_atoms * self.num_actions,
                                            use_cuda=USE_CUDA)
    def __init__(self, num_inputs, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowDQN, self).__init__()

        self.num_inputs = num_inputs
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.Vmin = Vmin
        self.Vmax = Vmax
        # Markovian
        numNodes = 16
        # NonMarkovian
        # numNodes = 48

        self.linear1 = nn.Linear(num_inputs, numNodes)
        self.linear2 = nn.Linear(numNodes, numNodes)

        # numNodes = 16

        self.noisy_value1 = NoisyLinear(numNodes, numNodes, use_cuda=USE_CUDA)
        self.noisy_value2 = NoisyLinear(numNodes,
                                        self.num_atoms,
                                        use_cuda=USE_CUDA)

        self.noisy_advantage1 = NoisyLinear(numNodes,
                                            numNodes,
                                            use_cuda=USE_CUDA)
        self.noisy_advantage2 = NoisyLinear(numNodes,
                                            self.num_atoms * self.num_actions,
                                            use_cuda=USE_CUDA)
Example #3
0
    def __init__(self, input_shape, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowCnnDQN, self).__init__()

        self.input_shape = input_shape
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.Vmin = Vmin
        self.Vmax = Vmax

        self.features = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU())

        self.noisy_value1 = NoisyLinear(self.feature_size(),
                                        512,
                                        use_cuda=USE_CUDA)
        self.noisy_value2 = NoisyLinear(512, self.num_atoms, use_cuda=USE_CUDA)

        self.noisy_advantage1 = NoisyLinear(self.feature_size(),
                                            512,
                                            use_cuda=USE_CUDA)
        self.noisy_advantage2 = NoisyLinear(512,
                                            self.num_atoms * self.num_actions,
                                            use_cuda=USE_CUDA)
Example #4
0
class RainbowDQN(nn.Module):
    def __init__(self, num_inputs, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowDQN, self).__init__()

        self.num_inputs = num_inputs
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.Vmin = Vmin
        self.Vmax = Vmax

        self.linear1 = nn.Linear(num_inputs, 32)
        self.linear2 = nn.Linear(32, 64)

        self.noisy_value1 = NoisyLinear(64, 64, use_cuda=USE_CUDA)
        self.noisy_value2 = NoisyLinear(64, self.num_atoms, use_cuda=USE_CUDA)

        self.noisy_advantage1 = NoisyLinear(64, 64, use_cuda=USE_CUDA)
        self.noisy_advantage2 = NoisyLinear(64,
                                            self.num_atoms * self.num_actions,
                                            use_cuda=USE_CUDA)

    def forward(self, x):
        batch_size = x.size(0)

        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))

        value = F.relu(self.noisy_value1(x))
        value = self.noisy_value2(value)

        advantage = F.relu(self.noisy_advantage1(x))
        advantage = self.noisy_advantage2(advantage)

        value = value.view(batch_size, 1, self.num_atoms)
        advantage = advantage.view(batch_size, self.num_actions,
                                   self.num_atoms)

        x = value + advantage - advantage.mean(1, keepdim=True)
        x = F.softmax(x.view(-1, self.num_atoms),
                      dim=1).view(-1, self.num_actions, self.num_atoms)

        return x

    def reset_noise(self):
        self.noisy_value1.reset_noise()
        self.noisy_value2.reset_noise()
        self.noisy_advantage1.reset_noise()
        self.noisy_advantage2.reset_noise()

    def act(self, state, epsilon=0.0):
        if random.random() > epsilon:
            state = Variable(torch.FloatTensor(state).unsqueeze(0))
            dist = self.forward(state).data.cpu()
            dist = dist * torch.linspace(self.Vmin, self.Vmax, self.num_atoms)
            action = dist.sum(2).max(1)[1].numpy()[0]
        else:
            action = -random.randrange(self.num_actions)
        return action
Example #5
0
 def __init__(self, num_inputs, num_actions, num_atoms, Vmin, Vmax):
     super(RainbowDQN, self).__init__()
     
     self.num_inputs   = num_inputs
     self.num_actions  = num_actions
     self.num_atoms    = num_atoms
     self.Vmin         = Vmin
     self.Vmax         = Vmax
     
     self.linear1 = nn.Linear(num_inputs, 32)
     self.linear2 = nn.Linear(32, 64)
     
     self.noisy_value1 = NoisyLinear(64, 64, use_cuda=False)
     self.noisy_value2 = NoisyLinear(64, self.num_atoms, use_cuda=False)
     
     self.noisy_advantage1 = NoisyLinear(64, 64, use_cuda=False)
     self.noisy_advantage2 = NoisyLinear(64, self.num_atoms * self.num_actions, use_cuda=False)
Example #6
0
    def __init__(self, input_shape, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowDQN, self).__init__()

        self.input_shape = input_shape
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.Vmin = Vmin
        self.Vmax = Vmax

        self.features = nn.Sequential(
            # ((84 - 8 - 2*0) / 4) + 1 = 20
            nn.Conv2d(input_shape[0], 32, kernel_size=8,
                      stride=4),  # batch_size x 32 x 20 x 20
            nn.ReLU(),

            # ((20 - 4 - 2*0) / 2) + 1 = 9
            nn.Conv2d(32, 64, kernel_size=4,
                      stride=2),  # batch_size x 64 x 9 x 9
            nn.ReLU(),

            # ((9 - 3 - 2*0) / 2) + 1 = 4
            nn.Conv2d(64, 64, kernel_size=3,
                      stride=1),  # batch_size x 64 x 4 x 4
            nn.ReLU())

        self.noisy_value1 = NoisyLinear(self.feature_size(),
                                        512,
                                        use_cuda=USE_CUDA)
        self.noisy_value2 = NoisyLinear(512, self.num_atoms, use_cuda=USE_CUDA)

        self.noisy_advantage1 = NoisyLinear(self.feature_size(),
                                            512,
                                            use_cuda=USE_CUDA)
        self.noisy_advantage2 = NoisyLinear(512,
                                            self.num_atoms * self.num_actions,
                                            use_cuda=USE_CUDA)
Example #7
0
class RainbowDQN(nn.Module):
    def __init__(self, input_shape, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowDQN, self).__init__()

        self.input_shape = input_shape
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.Vmin = Vmin
        self.Vmax = Vmax

        self.features = nn.Sequential(
            # ((84 - 8 - 2*0) / 4) + 1 = 20
            nn.Conv2d(input_shape[0], 32, kernel_size=8,
                      stride=4),  # batch_size x 32 x 20 x 20
            nn.ReLU(),

            # ((20 - 4 - 2*0) / 2) + 1 = 9
            nn.Conv2d(32, 64, kernel_size=4,
                      stride=2),  # batch_size x 64 x 9 x 9
            nn.ReLU(),

            # ((9 - 3 - 2*0) / 2) + 1 = 4
            nn.Conv2d(64, 64, kernel_size=3,
                      stride=1),  # batch_size x 64 x 4 x 4
            nn.ReLU())

        self.noisy_value1 = NoisyLinear(self.feature_size(),
                                        512,
                                        use_cuda=USE_CUDA)
        self.noisy_value2 = NoisyLinear(512, self.num_atoms, use_cuda=USE_CUDA)

        self.noisy_advantage1 = NoisyLinear(self.feature_size(),
                                            512,
                                            use_cuda=USE_CUDA)
        self.noisy_advantage2 = NoisyLinear(512,
                                            self.num_atoms * self.num_actions,
                                            use_cuda=USE_CUDA)

    def forward(self, x):
        batch_size = x.size(0)

        x = x / 255.
        x = self.features(x)
        x = x.view(batch_size, -1)

        value = F.relu(self.noisy_value1(x))
        value = self.noisy_value2(value)

        advantage = F.relu(self.noisy_advantage1(x))
        advantage = self.noisy_advantage2(advantage)

        value = value.view(batch_size, 1, self.num_atoms)
        advantage = advantage.view(batch_size, self.num_actions,
                                   self.num_atoms)

        x = value + advantage - advantage.mean(1, keepdim=True)
        x = F.softmax(x.view(-1, self.num_atoms)).view(-1, self.num_actions,
                                                       self.num_atoms)

        return x

    def reset_noise(self):
        self.noisy_value1.reset_noise()
        self.noisy_value2.reset_noise()
        self.noisy_advantage1.reset_noise()
        self.noisy_advantage2.reset_noise()

    def feature_size(self):
        return self.features(
            autograd.Variable(torch.zeros(1,
                                          *self.input_shape))).view(1,
                                                                    -1).size(1)

    def act(self, state):
        state = Variable(torch.FloatTensor(np.float32(state)).unsqueeze(0),
                         volatile=True)
        dist = self.forward(state).data.cpu()
        dist = dist * torch.linspace(self.Vmin, self.Vmax, self.num_atoms)
        action = dist.sum(2).max(1)[1].numpy()[0]
        return action
class RainbowCnnDQN(nn.Module):
    def __init__(self, input_shape, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowCnnDQN, self).__init__()

        self.input_shape = input_shape
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.Vmin = Vmin
        self.Vmax = Vmax

        #Definition of the neural network
        #Conv2d(input_shape, output_shape, kernel_size, stride)
        #(kernel_size = size of the convolutional filter)
        #(stride = step used to shift the convolutional filter)
        #ReLu --> max(0,x)
        self.features = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU())

        #NoisyLinear( in_features, out_features, use_cuda)
        #(Layers after the features)
        self.noisy_value1 = NoisyLinear(self.feature_size(),
                                        512,
                                        use_cuda=USE_CUDA)
        self.noisy_value2 = NoisyLinear(512, self.num_atoms, use_cuda=USE_CUDA)

        self.noisy_advantage1 = NoisyLinear(self.feature_size(),
                                            512,
                                            use_cuda=USE_CUDA)
        self.noisy_advantage2 = NoisyLinear(512,
                                            self.num_atoms * self.num_actions,
                                            use_cuda=USE_CUDA)

    # passes a state through the neural network, gives distributional output (1,actions,atoms)
    def forward(self, x):

        batch_size = x.size(0)

        #Colored pixels
        x = x / 255.
        #x goes through the NN
        x = self.features(x)
        #Reshape x with batch_size rows & adapted number of columns
        x = x.view(batch_size, -1)

        #Output num_atoms features lists
        value = F.relu(self.noisy_value1(x))
        value = self.noisy_value2(value)

        #Output num_atoms * num_actions features lists
        advantage = F.relu(self.noisy_advantage1(x))
        advantage = self.noisy_advantage2(advantage)

        #Reshape value & advantage
        value = value.view(batch_size, 1, self.num_atoms)
        advantage = advantage.view(batch_size, self.num_actions,
                                   self.num_atoms)

        #Factorization of action values : DUELING NETWORKS
        #(mean only over the different actions --> over 2d dimension of advantage)
        # here x = q(s,a)
        x = value + advantage - advantage.mean(1, keepdim=True)

        #DISTRIBUTIONAL RL
        # softmax => for each action, returns num_atoms probabilities
        x = F.softmax(x.view(-1, self.num_atoms),
                      dim=1).view(-1, self.num_actions, self.num_atoms)

        #dim : ( 1, num_actions, num_atoms)
        return x

    def reset_noise(self):
        self.noisy_value1.reset_noise()
        self.noisy_value2.reset_noise()
        self.noisy_advantage1.reset_noise()
        self.noisy_advantage2.reset_noise()

    #Size of the output of the features (before passing through noisynets)
    def feature_size(self):
        return self.features(
            autograd.Variable(torch.zeros(1,
                                          *self.input_shape))).view(1,
                                                                    -1).size(1)

    #returns the index of the best action to choose
    def act(self, state):
        #unsqueeze(0) = adds a "1" dimension at index 0 of shape ; make it float.
        #Volatile : doesn't need much memory because we won't do any backprop
        state = Variable(torch.FloatTensor(np.float32(state)).unsqueeze(0),
                         volatile=True)

        #.cpu() moves the tensor to the cpu
        #.data : content of the tensor
        dist = self.forward(state).data.cpu()

        #torch.linspace : 1D tensor length num_atoms, equally spaced points in [Vmin,Vmax]
        #dist : shape = ( ?, num_actions, num_atoms)
        # * => multiplies each (1,num_actions) matrix by a number in [Vmin,Vmax]
        dist = dist * torch.linspace(self.Vmin, self.Vmax, self.num_atoms)

        #dist.sum(2) = sum over dimension 2 => shape(?,num_actions)
        # => sum the results of all the atoms
        # .max(1) =>  maximizes on the different actions
        # [1] => indices of the max*
        # .numpy() => transforms into an array
        # [0] => index of the action to choose
        action = dist.sum(2).max(1)[1].numpy()[0]
        return action
class RainbowDQN(nn.Module):
    def __init__(self, num_inputs, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowDQN, self).__init__()

        self.num_inputs = num_inputs
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.Vmin = Vmin
        self.Vmax = Vmax
        # Markovian
        numNodes = 16
        # NonMarkovian
        # numNodes = 48

        self.linear1 = nn.Linear(num_inputs, numNodes)
        self.linear2 = nn.Linear(numNodes, numNodes)

        # numNodes = 16

        self.noisy_value1 = NoisyLinear(numNodes, numNodes, use_cuda=USE_CUDA)
        self.noisy_value2 = NoisyLinear(numNodes,
                                        self.num_atoms,
                                        use_cuda=USE_CUDA)

        self.noisy_advantage1 = NoisyLinear(numNodes,
                                            numNodes,
                                            use_cuda=USE_CUDA)
        self.noisy_advantage2 = NoisyLinear(numNodes,
                                            self.num_atoms * self.num_actions,
                                            use_cuda=USE_CUDA)

    def forward(self, x):
        batch_size = x.size(0)

        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))

        value = F.relu(self.noisy_value1(x))
        value = self.noisy_value2(value)

        advantage = F.relu(self.noisy_advantage1(x))
        advantage = self.noisy_advantage2(advantage)

        value = value.view(batch_size, 1, self.num_atoms)
        advantage = advantage.view(batch_size, self.num_actions,
                                   self.num_atoms)

        x = value + advantage - advantage.mean(1, keepdim=True)
        x = F.softmax(x.view(-1, self.num_atoms)).view(-1, self.num_actions,
                                                       self.num_atoms)

        return x

    def reset_noise(self):
        self.noisy_value1.reset_noise()
        self.noisy_value2.reset_noise()
        self.noisy_advantage1.reset_noise()
        self.noisy_advantage2.reset_noise()

    def act(self, state):
        state = Variable(torch.FloatTensor(state).unsqueeze(0), volatile=True)
        dist = self.forward(state).data.cpu()
        dist = dist * torch.linspace(self.Vmin, self.Vmax, self.num_atoms)
        action = dist.sum(2).max(1)[1].numpy()[0]
        return action