Beispiel #1
0
    def __init__(self, num_inputs, num_outputs):
        super(Bernoulli, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
Beispiel #2
0
    def __init__(self, num_inputs, num_outputs):
        super(DiagGaussian, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
        self.logstd = AddBias(torch.zeros(num_outputs))
Beispiel #3
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=512):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Beispiel #4
0
    def __init__(self, num_inputs, num_outputs):
        super(Categorical, self).__init__()

        init_ = lambda m: init(
            m,
            nn.init.orthogonal_,
            lambda x: nn.init.constant_(x, 0),
            gain=0.01)

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
Beispiel #5
0
    def __init__(self, obs_shape, action_space):
        super(Discriminator, self).__init__()
        # atari game
        if (action_space.__class__.__name__ == "Discrete") and (len(obs_shape)
                                                                == 3):
            action_dim = action_space.n
            state_dim = obs_shape[0]
            relu_init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.
                                        init.constant_(x, 0),
                                        nn.init.calculate_gain('relu'))
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0))

            self.main = nn.Sequential(
                relu_init_(nn.Conv2d(state_dim, 32, 8, stride=4)), nn.ReLU(),
                relu_init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
                relu_init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(),
                Flatten(), relu_init_(nn.Linear(32 * 7 * 7, 512)), nn.ReLU(),
                init_(nn.Linear(512, action_dim)))
        # mujoco
        elif (action_space.__class__.__name__ == "Box") and (len(obs_shape)
                                                             == 1):
            action_dim = action_space.shape[0]
            state_dim = obs_shape[0]
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0),
                                   nn.init.calculate_gain('relu'))

            self.main = nn.Sequential(
                init_(nn.Linear(state_dim + action_dim, 64)), nn.ReLU(),
                init_(nn.Linear(64, 64)), nn.ReLU(), init_(nn.Linear(64, 1)))
        # elif action_space.__class__.__name__ == "MultiBinary":
        #     action_dim = action_space.shape[0]
        else:
            raise NotImplementedError

        self.train()
Beispiel #6
0
    def __init__(self, obs_shape, action_space):
        super(Critic, self).__init__()
        if (action_space.__class__.__name__ == "Box") and (len(obs_shape)
                                                           == 1):
            action_dim = action_space.shape[0]
            state_dim = obs_shape[0]
            # init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
            #                        constant_(x, 0), nn.init.calculate_gain('tanh'))
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0), np.sqrt(2))

            self.main = nn.Sequential(
                init_(nn.Linear(state_dim + action_dim, 64)), nn.Tanh(),
                init_(nn.Linear(64, 64)), nn.Tanh(), init_(nn.Linear(64, 1)))
        else:
            raise NotImplementedError

        self.train()
Beispiel #7
0
    def __init__(self, obs_shape, action_space):
        super(Actor, self).__init__()
        if (action_space.__class__.__name__ == "Box") and (len(obs_shape)
                                                           == 1):
            action_dim = action_space.shape[0]
            state_dim = obs_shape[0]
            # init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
            #                        constant_(x, 0), nn.init.calculate_gain('tanh'))
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0), np.sqrt(2))

            self.base = nn.Sequential(init_(nn.Linear(state_dim, 64)),
                                      nn.Tanh(), init_(nn.Linear(64, 64)),
                                      nn.Tanh())
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(64, num_outputs)
        else:
            raise NotImplementedError
Beispiel #8
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=64):
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size)

        if recurrent:
            num_inputs = hidden_size

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                   nn.Tanh(),
                                   init_(nn.Linear(hidden_size, hidden_size)),
                                   nn.Tanh())

        self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                    nn.Tanh(),
                                    init_(nn.Linear(hidden_size, hidden_size)),
                                    nn.Tanh())

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()