Beispiel #1
0
    def __init__(self, num_inputs, action_space, normalize=False, name=None):
        super(ActorCritic, self).__init__()

        self._name = name

        self.conv1 = nn.Conv2d(in_channels=num_inputs,
                               out_channels=32,
                               kernel_size=8,
                               stride=4)
        self.conv2 = nn.Conv2d(in_channels=32,
                               out_channels=64,
                               kernel_size=4,
                               stride=2)
        self.conv3 = nn.Conv2d(in_channels=64,
                               out_channels=32,
                               kernel_size=3,
                               stride=1)

        conv_out_size = self._get_conv_out((num_inputs, 84, 84))
        self.linear1 = nn.Linear(in_features=conv_out_size, out_features=512)

        self.critic_linear = nn.Linear(in_features=512, out_features=1)
        self.actor_linear = nn.Linear(in_features=512,
                                      out_features=action_space.n)

        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.linear1.weight.data.mul_(relu_gain)

        self.ob_rms = RunningMeanStd(shape=(84, 84)) if normalize else None
Beispiel #2
0
    def __init__(self, args, action_space):
        super(DQN, self).__init__()

        self.categorical = args.categorical
        self.dueling = args.dueling
        self.atoms = args.atoms if args.categorical else 1
        self.action_space = action_space

        Linear = NoisyLinear if args.noisy_linear else nn.Linear

        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=args.history_length,
                      out_channels=32,
                      kernel_size=8,
                      stride=4), nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4,
                      stride=2), nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3,
                      stride=1), nn.ReLU())

        conv_out_size = self._get_conv_out((args.history_length, 84, 84))

        # TODO: Add std_init argument to noisy linear constructors
        self.fc_a = nn.Sequential(
            Linear(in_features=conv_out_size, out_features=args.hidden_size),
            nn.ReLU(),
            Linear(in_features=args.hidden_size,
                   out_features=action_space * self.atoms),
        )

        if args.dueling:
            self.fc_v = nn.Sequential(
                Linear(in_features=conv_out_size,
                       out_features=args.hidden_size),
                nn.ReLU(),
                Linear(in_features=args.hidden_size, out_features=self.atoms),
            )

        self.apply(weights_init)

        self.ob_rms = RunningMeanStd(shape=(84,
                                            84)) if args.normalize else None
Beispiel #3
0
class ActorCritic(nn.Module):
    def __init__(self, num_inputs, action_space, normalize=False, name=None):
        super(ActorCritic, self).__init__()

        self._name = name

        self.conv1 = nn.Conv2d(in_channels=num_inputs,
                               out_channels=32,
                               kernel_size=8,
                               stride=4)
        self.conv2 = nn.Conv2d(in_channels=32,
                               out_channels=64,
                               kernel_size=4,
                               stride=2)
        self.conv3 = nn.Conv2d(in_channels=64,
                               out_channels=32,
                               kernel_size=3,
                               stride=1)

        conv_out_size = self._get_conv_out((num_inputs, 84, 84))
        self.linear1 = nn.Linear(in_features=conv_out_size, out_features=512)

        self.critic_linear = nn.Linear(in_features=512, out_features=1)
        self.actor_linear = nn.Linear(in_features=512,
                                      out_features=action_space.n)

        self.apply(weights_init)

        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.linear1.weight.data.mul_(relu_gain)

        self.ob_rms = RunningMeanStd(shape=(84, 84)) if normalize else None

    def _get_conv_out(self, shape):
        o = self.conv1(torch.zeros(1, *shape))
        o = self.conv2(o)
        o = self.conv3(o)
        return int(np.prod(o.size()))

    def forward(self, x):
        with torch.no_grad():
            if self.ob_rms:
                if self.training:
                    self.ob_rms.update(x)
                mean = self.ob_rms.mean.to(dtype=torch.float32,
                                           device=x.device)
                std = torch.sqrt(
                    self.ob_rms.var.to(dtype=torch.float32, device=x.device) +
                    float(np.finfo(np.float32).eps))
                x = (x - mean) / std

        x = x.to(dtype=self.conv1.weight.dtype)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))

        x = x.view(x.size(0), -1)
        x = F.relu(self.linear1(x))

        return self.critic_linear(x), self.actor_linear(x)

    def name(self):
        return self._name

    def save(self):
        if self.name():
            name = '{}.pth'.format(self.name())
            torch.save(self.state_dict(), name)

    def load(self, name=None):
        self.load_state_dict(torch.load(name if name else self.name()))
Beispiel #4
0
class DQN(nn.Module):
    def __init__(self, args, action_space):
        super(DQN, self).__init__()

        self.categorical = args.categorical
        self.dueling = args.dueling
        self.atoms = args.atoms if args.categorical else 1
        self.action_space = action_space

        Linear = NoisyLinear if args.noisy_linear else nn.Linear

        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=args.history_length,
                      out_channels=32,
                      kernel_size=8,
                      stride=4), nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4,
                      stride=2), nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3,
                      stride=1), nn.ReLU())

        conv_out_size = self._get_conv_out((args.history_length, 84, 84))

        # TODO: Add std_init argument to noisy linear constructors
        self.fc_a = nn.Sequential(
            Linear(in_features=conv_out_size, out_features=args.hidden_size),
            nn.ReLU(),
            Linear(in_features=args.hidden_size,
                   out_features=action_space * self.atoms),
        )

        if args.dueling:
            self.fc_v = nn.Sequential(
                Linear(in_features=conv_out_size,
                       out_features=args.hidden_size),
                nn.ReLU(),
                Linear(in_features=args.hidden_size, out_features=self.atoms),
            )

        self.apply(weights_init)

        self.ob_rms = RunningMeanStd(shape=(84,
                                            84)) if args.normalize else None

    def _get_conv_out(self, shape):
        o = self.conv(torch.zeros(1, *shape))
        return int(np.prod(o.size()))

    def forward(self, x, log=False):
        with torch.no_grad():
            if self.ob_rms:
                if self.training:
                    self.ob_rms.update(x)
                mean = self.ob_rms.mean.to(dtype=torch.float32,
                                           device=x.device)
                std = torch.sqrt(
                    self.ob_rms.var.to(dtype=torch.float32, device=x.device) +
                    float(np.finfo(np.float32).eps))
                x = (x - mean) / std

        conv_out = self.conv(x).view(x.size(0), -1)
        a = self.fc_a(conv_out).view(-1, self.action_space, self.atoms)

        if self.dueling:
            v = self.fc_v(conv_out).view(-1, 1, self.atoms)
            q = v + a - a.mean(1, keepdim=True)  # Combine streams
        else:
            q = a

        if self.categorical:
            if log:  # Use log softmax for numerical stability
                q = F.log_softmax(
                    q, dim=2
                )  # Log probabilities with action over second dimension
            else:
                q = F.softmax(
                    q,
                    dim=2)  # Probabilities with action over second dimension

        return q

    def reset_noise(self):
        for m in self.fc_a.modules():
            if isinstance(m, NoisyLinear):
                m.reset_noise()

        if self.dueling:
            for m in self.fc_v.modules():
                if isinstance(m, NoisyLinear):
                    m.reset_noise()