def __init__(self, input_shape, n_actions):
        super(RainbowDQN, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )

        conv_out_size = self._get_conv_out(input_shape)
        self.fc_val = nn.Sequential(
            dqn_model.NoisyLinear(conv_out_size, 512),
            nn.ReLU(),
            dqn_model.NoisyLinear(512, N_ATOMS)
        )

        self.fc_adv = nn.Sequential(
            dqn_model.NoisyLinear(conv_out_size, 512),
            nn.ReLU(),
            dqn_model.NoisyLinear(512, n_actions * N_ATOMS)
        )

        self.register_buffer("supports", torch.arange(Vmin, Vmax, DELTA_Z))
        self.softmax = nn.Softmax(dim=1)
예제 #2
0
    def __init__(self, obs_size, act_size):
        super(DDPGActor, self).__init__()

        self.net = nn.Sequential(dqn_model.NoisyLinear(obs_size,
                                                       400), nn.ReLU(),
                                 dqn_model.NoisyLinear(400, 300), nn.ReLU(),
                                 dqn_model.NoisyLinear(300, act_size),
                                 nn.Tanh())
예제 #3
0
    def __init__(self, input_shape, n_actions):
        super(RainbowDQN, self).__init__()

        self.fc_val = nn.Sequential(
            dqn_model.NoisyLinear(input_shape, NUMBER_NEURONS), nn.ReLU(),
            dqn_model.NoisyLinear(NUMBER_NEURONS, N_ATOMS))

        self.fc_adv = nn.Sequential(
            dqn_model.NoisyLinear(input_shape, NUMBER_NEURONS), nn.ReLU(),
            dqn_model.NoisyLinear(NUMBER_NEURONS, n_actions * N_ATOMS))

        self.register_buffer("supports",
                             torch.arange(Vmin, Vmax + DELTA_Z, DELTA_Z))
        self.softmax = nn.Softmax(dim=1)
예제 #4
0
    def __init__(self, input_shape, n_actions):
        super(NoisyDQN, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU())

        conv_out_size = self._get_conv_out(input_shape)
        self.noisy_layers = [
            dqn_model.NoisyLinear(conv_out_size, 512),
            dqn_model.NoisyLinear(512, n_actions)
        ]
        self.fc = nn.Sequential(self.noisy_layers[0], nn.ReLU(),
                                self.noisy_layers[1])