Ejemplo n.º 1
0
    def __init__(self, num_inputs, use_gru):
        super(CNNBase, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, 512)), nn.ReLU())

        if use_gru:
            self.gru = nn.GRUCell(512, 512)
            nn.init.orthogonal_(self.gru.weight_ih.data)
            nn.init.orthogonal_(self.gru.weight_hh.data)
            self.gru.bias_ih.data.fill_(0)
            self.gru.bias_hh.data.fill_(0)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(512, 1))

        self.train()
Ejemplo n.º 2
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=64):
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size)

        if recurrent:
            num_inputs = hidden_size

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        layer_sizes = [num_inputs, hidden_size, hidden_size]

        layers = reduce(operator.add,
                        [[nn.Linear(a, b), nn.ReLU()]
                         for a, b in zip(layer_sizes[0:-1], layer_sizes[1:])])
        self.actor = nn.Sequential(*layers)

        layers = reduce(operator.add,
                        [[nn.Linear(a, b), nn.ReLU()]
                         for a, b in zip(layer_sizes[0:-1], layer_sizes[1:])])
        self.critic = nn.Sequential(*layers)

        # self.actor = nn.Sequential(
        #     init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh())
        #
        # self.critic = nn.Sequential(
        #     init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh())

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Ejemplo n.º 3
0
    def __init__(self, num_inputs, num_outputs):
        super(Bernoulli, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
Ejemplo n.º 4
0
    def __init__(self, num_inputs, num_outputs):
        super(DiagGaussian, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
        self.logstd = AddBias(torch.zeros(num_outputs))
Ejemplo n.º 5
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=512):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
    def __init__(self, num_inputs, num_outputs):
        super(Categorical, self).__init__()

        init_ = lambda m: init(m,
                               nn.init.orthogonal_,
                               lambda x: nn.init.constant_(x, 0),
                               gain=0.01)

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
Ejemplo n.º 7
0
    def __init__(self, num_inputs):
        super(MLPBase, self).__init__()

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(
            x, 0))

        self.actor = nn.Sequential(init_(nn.Linear(num_inputs, 64)), nn.Tanh(),
                                   init_(nn.Linear(64, 64)), nn.Tanh())

        self.critic = nn.Sequential(init_(nn.Linear(num_inputs, 64)),
                                    nn.Tanh(), init_(nn.Linear(64, 64)),
                                    nn.Tanh())

        self.critic_linear = init_(nn.Linear(64, 1))

        self.train()
Ejemplo n.º 8
0
    def __init__(self, perception, action_space, internal_state_size=128):
        '''
            Args:
                base: A unit which of type ActorCriticModule
        '''
        super().__init__()
        self.perception_unit = perception
        self.gru = nn.GRUCell(input_size=internal_state_size,
                              hidden_size=internal_state_size)

        # Make the critic
        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linear = init_(
            nn.Linear(self.perception_unit.output_size, 1))

        num_outputs = action_space.n
        self.dist = Categorical(internal_state_size, num_outputs)

        self.l2 = nn.MSELoss()
        self.l1 = nn.L1Loss()
Ejemplo n.º 9
0
    def __init__(self, perception, action_space, num_stack=4):
        '''
            Args:
                base: A unit which of type ActorCriticModule
        '''
        super().__init__()
        self.perception_unit = perception
        # Make the critic
        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linear = init_(
            nn.Linear(self.perception_unit.output_size, 1))

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.perception_unit.output_size,
                                    num_outputs)
        else:
            raise NotImplementedError

        self.l2 = nn.MSELoss()
        self.l1 = nn.L1Loss()
Ejemplo n.º 10
0
 def init_(m):
     return init(m, nn.init.orthogonal_,
                 lambda x: nn.init.constant_(x, 0), np.sqrt(2))