Example #1
0
    def __init__(self, num_inputs, recurrent=False, hidden_size = 64):
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size)

        if recurrent:
            num_inputs = hidden_size

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self.actor = nn.Sequential(
            init_(nn.Linear(num_inputs, hidden_size * 3)),  nn.Tanh(),
            init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 3, hidden_size * 2)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 2, hidden_size)),  nn.Tanh(),
            init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh())

        self.critic = nn.Sequential(
            init_(nn.Linear(num_inputs, hidden_size * 3)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 3, hidden_size * 3)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 3, hidden_size * 2)), nn.Tanh(),
            init_(nn.Linear(hidden_size * 2, hidden_size)), nn.Tanh(),
            init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh())

        self.critic_linear = init_(nn.Linear(hidden_size, 1))
        self.train()
    def __init__(self, num_inputs, num_outputs):
        super(Bernoulli, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
    def __init__(self, num_inputs, num_outputs):
        super(DiagGaussian, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
        self.logstd = AddBias(torch.zeros(num_outputs))
Example #4
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=512):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
    def __init__(self, num_inputs, num_outputs):
        super(Categorical, self).__init__()

        init_ = lambda m: init(m,
                               nn.init.orthogonal_,
                               lambda x: nn.init.constant_(x, 0),
                               gain=0.01)

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
Example #6
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=256):

        super(CNNPro, self).__init__(recurrent, num_inputs, hidden_size)
        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.share = nn.Sequential(
            init_(nn.Conv2d(config.channel, 64, 3, stride=1, padding=1)),
            nn.ReLU(),
            init_(nn.Conv2d(64, 64, 3, stride=1, padding=1)),
            nn.ReLU(),
            init_(nn.Conv2d(64, 64, 3, stride=1, padding=1)),
            nn.ReLU(),
            init_(nn.Conv2d(64, 64, 3, stride=1, padding=1)),
            nn.ReLU(),
            init_(nn.Conv2d(64, 64, 3, stride=1, padding=1)),
            nn.ReLU(),
        )
        pred_len = config.container_size[0] * config.container_size[1]
        if config.enable_rotation:
            pred_len = pred_len * 2

        self.mask = nn.Sequential(
            init_(nn.Conv2d(64, 8, 1, stride=1)),
            nn.ReLU(),
            Flatten(),
            init_(
                nn.Linear(8 * config.pallet_size * config.pallet_size,
                          hidden_size)),
            nn.ReLU(),
            init_(nn.Linear(hidden_size, pred_len)),
            nn.ReLU(),
            # nn.Sigmoid(),
        )

        self.actor = nn.Sequential(
            init_(nn.Conv2d(64, 8, 1, stride=1)),
            nn.ReLU(),
            Flatten(),
            init_(
                nn.Linear(8 * config.pallet_size * config.pallet_size,
                          hidden_size)),
            nn.ReLU(),
        )

        self.critic = nn.Sequential(
            init_(nn.Conv2d(64, 4, 1, stride=1)),
            nn.ReLU(),
            Flatten(),
            init_(
                nn.Linear(4 * config.pallet_size * config.pallet_size,
                          hidden_size)),
            nn.ReLU(),
        )
        self.critic_linear = init_(nn.Linear(hidden_size, 1))
        self.train()