Ejemplo n.º 1
0
    def __init__(self,
                 num_inputs,
                 recurrent=False,
                 num_streets=4,
                 hidden_size=256,
                 total_hidden_size=512):
        if recurrent:
            raise NotImplementedError("recurrent policy not done yet")
        super(NaviBaseTemp, self).__init__(recurrent, hidden_size, hidden_size)
        self.num_streets = num_streets
        init_cnn = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                  constant_(x, 0),
                                  nn.init.calculate_gain('relu'))

        self.img_embed = nn.Sequential(
            init_cnn(nn.Conv2d(8, 96, 3, stride=2)), nn.ReLU(),
            init_cnn(nn.Conv2d(96, 96, 5, stride=2)), nn.ReLU(),
            init_cnn(nn.Conv2d(96, 32, 5, stride=2)), nn.ReLU(), Flatten(),
            init_cnn(nn.Linear(32 * 8 * 8, total_hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(total_hidden_size, 1))
        self.train()
Ejemplo n.º 2
0
    def __init__(self, num_inputs, num_outputs):
        super(Bernoulli, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
Ejemplo n.º 3
0
    def __init__(self, num_inputs, num_outputs):
        super(DiagGaussian, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
        self.logstd = AddBias(torch.zeros(num_outputs))
Ejemplo n.º 4
0
    def __init__(self,
                 num_inputs,
                 recurrent=False,
                 num_streets=4,
                 hidden_size=256,
                 total_hidden_size=(256 * 10)):
        if recurrent:
            raise NotImplementedError("recurrent policy not done yet")
        super(NaviBase, self).__init__(recurrent, hidden_size, hidden_size)
        self.num_streets = num_streets
        init_cnn = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                  constant_(x, 0),
                                  nn.init.calculate_gain('relu'))
        init_dense = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                    constant_(x, 0), np.sqrt(2))

        self.img_embed = nn.Sequential(
            init_cnn(nn.Conv2d(3, 32, 3, stride=2)), nn.ReLU(),
            init_cnn(nn.Conv2d(32, 64, 5, stride=2)), nn.ReLU(),
            init_cnn(nn.Conv2d(64, 32, 5, stride=2)), nn.ReLU(), Flatten(),
            init_cnn(nn.Linear(32 * 8 * 8, hidden_size)), nn.ReLU())

        # NeED to look if different activation functions

        self.coord_embed = nn.Sequential(
            init_dense(nn.Linear(2, 64)), nn.Tanh(),
            init_dense(nn.Linear(64, hidden_size)), nn.Tanh())

        self.number_embed = nn.Sequential(init_dense(nn.Linear(10, 64)),
                                          nn.Tanh())

        self.street_embed = nn.Sequential(
            init_dense(nn.Linear(self.num_streets, hidden_size)), nn.Tanh())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.fusion = nn.Sequential(
            init_dense(nn.Linear(total_hidden_size, total_hidden_size)),
            nn.Tanh(),
            init_dense(nn.Linear(total_hidden_size, total_hidden_size)),
            nn.Tanh())

        self.critic_linear = init_(nn.Linear(total_hidden_size, 1))

        self.train()
Ejemplo n.º 5
0
    def __init__(self, num_inputs, num_outputs):
        super(Categorical, self).__init__()

        init_ = lambda m: init(m,
                               nn.init.orthogonal_,
                               lambda x: nn.init.constant_(x, 0),
                               gain=0.01)

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
Ejemplo n.º 6
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=512):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Ejemplo n.º 7
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=64):
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size)

        if recurrent:
            num_inputs = hidden_size

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                   nn.Tanh(),
                                   init_(nn.Linear(hidden_size, hidden_size)),
                                   nn.Tanh())

        self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                    nn.Tanh(),
                                    init_(nn.Linear(hidden_size, hidden_size)),
                                    nn.Tanh())

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()