Esempio n. 1
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=32):
        super(GridBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 8, 4, stride=2, padding=(7, 0))), nn.ReLU(),
            init_(nn.Conv2d(8, 16, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(16, 8, 3, stride=2)), nn.ReLU(), Flatten(),
            init_(nn.Linear(8 * 3 * 3, hidden_size)), nn.ReLU())

        # self.main = nn.Sequential(
        #     Flatten(),
        #     init_(nn.Linear(4 * 680, 1024)), nn.ReLU(),
        #     init_(nn.Linear(1024, 256)), nn.ReLU(),
        #     init_(nn.Linear(256, 64)), nn.ReLU(), 
        #     init_(nn.Linear(64, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
    def __init__(self, num_inputs, obs_shape, recurrent=False, hidden_size=64):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        n = obs_shape[1]
        m = obs_shape[2]
        kernel_size = 2
        image_embedding_size = (
            (n-1)//kernel_size-kernel_size)*((m-1)//kernel_size-kernel_size)*64
        self.main = nn.Sequential(
            nn.Conv2d(obs_shape[0], 16, kernel_size),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size),
            nn.Conv2d(16, 32, kernel_size),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size),
            nn.ReLU(),
            Flatten(),
            init_(nn.Linear(image_embedding_size, hidden_size)), nn.ReLU())

        # self.main = nn.Sequential(
        #     init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
        #     init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
        #     init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
        #     init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
    def __init__(self, num_inputs, recurrent=False, hidden_size=512):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 1, stride=1)), nn.ReLU(),  # out: 128x128x32
            init_(nn.Conv2d(32, 32, 4, stride=2)), nn.ReLU(),  # out: 63x63x32
            init_(nn.Conv2d(32, 32, 5, stride=2)), nn.ReLU(),  # out: 30x30x32
            init_(nn.Conv2d(32, 32, 4, stride=2)), nn.ReLU(),  # out: 14x14x32
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),  # out: 6x6x64
            Flatten())

        self.linear = nn.Sequential(init_(nn.Linear(6*6*64, hidden_size)), nn.ReLU())

        # self.main = nn.Sequential(
        #     init_(nn.Conv2d(num_inputs, 32, 1, stride=1)), nn.ReLU(),
        #     init_(nn.Conv2d(32, 32, 3, stride=2)), nn.ReLU(),
        #     init_(nn.Conv2d(32, 64, 5, stride=2)), nn.ReLU(),
        #     init_(nn.Conv2d(64, 32, 5, stride=2)), nn.ReLU(), Flatten(),
        #     init_(nn.Linear(32 * 8 * 8, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Esempio n. 4
0
    def __init__(self, cfg, obs_space, action_space):
        num_inputs = obs_space[0]
        recurrent = cfg.recurrent
        hidden_size = cfg.hidden_size
        use_init = cfg.use_init

        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        if use_init:
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0),
                                   nn.init.calculate_gain('relu'))
        else:
            init_ = lambda m: init_null(m, nn.init.orthogonal_, lambda x: nn.
                                        init.constant_(x, 0),
                                        nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())

        if use_init:
            init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0))
        else:
            init_ = lambda m: init_null(m, nn.init.orthogonal_, lambda x: nn.
                                        init.constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
    def __init__(self,
                 num_inputs,
                 old_model,
                 recurrent=False,
                 hidden_size=512):
        super(CNNBaseNew, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))
        self.old_model = old_model
        self.main1 = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU())
        self.main2 = nn.Sequential(init_(nn.Conv2d(32, 64, 4, stride=2)),
                                   nn.ReLU())
        self.main3 = nn.Sequential(init_(nn.Conv2d(64, 32, 3, stride=1)),
                                   nn.ReLU(), Flatten())
        self.main4 = nn.Sequential(init_(nn.Linear(32 * 7 * 7, hidden_size)),
                                   nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
    def __init__(self, num_inputs, recurrent=False, hidden_size=512):
        super(CNNBase64, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        # CNN for 64x64
        self.main = nn.Sequential(
            # input (3, 64, 64)
            init_(nn.Conv2d(num_inputs, 32, 6, stride=4, padding=1)),
            nn.ReLU(),
            # input (3, 16, 16)
            init_(nn.Conv2d(32, 64, 4, stride=2, padding=2)),
            nn.ReLU(),
            # input (3, 9, 9)
            init_(nn.Conv2d(64, 32, 3, stride=1)),
            nn.ReLU(),
            Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)),
            nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Esempio n. 7
0
    def __init__(self,
                 num_inputs,
                 recurrent=False,
                 hidden_size=64,
                 est_beta_value=False):
        super(CNN_minigrid, self).__init__(recurrent, hidden_size, hidden_size)

        self.est_beta_value = est_beta_value

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(init_(nn.Conv2d(3, 16, (2, 2))), nn.ReLU(),
                                  nn.MaxPool2d((2, 2)),
                                  init_(nn.Conv2d(16, 32, (2, 2))), nn.ReLU(),
                                  init_(nn.Conv2d(32, 64, (2, 2))), nn.ReLU(),
                                  Flatten(), init_(nn.Linear(64, hidden_size)),
                                  nn.Tanh())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.beta_value_net = nn.Sequential(init_(nn.Linear(hidden_size, 1)),
                                            nn.Sigmoid())

        self.train()
Esempio n. 8
0
    def __init__(self,
                 num_inputs,
                 recurrent=False,
                 hidden_size=512,
                 est_beta_value=False):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        self.est_beta_value = est_beta_value

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.beta_value_net = nn.Sequential(init_(nn.Linear(hidden_size, 1)),
                                            nn.Sigmoid())

        self.train()
Esempio n. 9
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=512):
        super(KeyValueBase, self).__init__(recurrent, hidden_size, hidden_size)
        self.hidden_size = hidden_size
        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs-3, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())
        
        self.kv_extractor = nn.Sequential(
            init_(nn.Conv2d(1, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size*2)), nn.ReLU())


        self.embedding_merge = nn.Sequential(
            init_(nn.Linear(2*hidden_size, 700)), nn.ReLU(),
            init_(nn.Linear(700, hidden_size)), nn.ReLU()
            )


        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Esempio n. 10
0
    def __init__(self,
                 num_inputs,
                 recurrent=False,
                 hidden_size=512,
                 normalize=True):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.conv1 = (nn.Conv2d(num_inputs, 32, 8, stride=4))
        self.conv2 = (nn.Conv2d(32, 64, 4, stride=2))
        self.conv3 = (nn.Conv2d(64, 64, 3, stride=1))
        self.fc1 = (nn.Linear(32 * 7 * 7 * 2, hidden_size))
        self.relu = nn.ReLU()
        self.flatten = Flatten()
        self.critic_linear = (nn.Linear(hidden_size, 1))

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = (nn.Linear(hidden_size, 1))
        self.normalize = normalize

        self.train()
Esempio n. 11
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=512):
        """
        self.main + self.critic_learner = actor
        self.train + self.dist = critic
        :param num_inputs:
        :param recurrent:
        :param hidden_size:
        """
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        # weight, bias initialization
        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))
        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())
        # print(self.main)

        # weight, bias initialization
        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linear = init_(nn.Linear(hidden_size, 1))
        # print(self.critic_linear)

        self.train()  # sets the module in training mode.
    def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 init_type=3,
                 can_split=True,
                 actv_fn='relu',
                 has_bn=False):

        super().__init__(can_split=can_split, actv_fn=actv_fn, has_bn=has_bn)

        self.has_bias = bias
        if has_bn:
            self.bn = nn.BatchNorm1d(out_features)
            self.has_bias = False

        init1_ = lambda m: init(m,
                                nn.init.orthogonal_,
                                lambda x: nn.init.constant_(x, 0),
                                gain=0.01)

        init2_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                constant_(x, 0))
        init3_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                constant_(x, 0), np.sqrt(2))

        self.module = nn.Linear(in_features, out_features, self.has_bias)
        init_dict = {
            1: init1_,
            2: init2_,
            3: init3_,
        }
        init_dict[init_type](self.module)
Esempio n. 13
0
    def __init__(self, num_inputs, recurrent=True, hidden_size=512):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m,
            nn.init.orthogonal_,
            lambda x: nn.init.constant_(x, 0),
            nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)),
            nn.ELU(),
            init_(nn.Conv2d(32, 32, 3, stride=2, padding=1)),
            nn.ELU(),
            init_(nn.Conv2d(32, 32, 3, stride=2, padding=1)),
            nn.ELU(),
            init_(nn.Conv2d(32, 32, 3, stride=2, padding=1)),
            nn.ELU(),
            Flatten(),
            init_(nn.Linear(32 * 6 * 6, hidden_size)),
            nn.ReLU()
        )

        init_ = lambda m: init(m,
            nn.init.orthogonal_,
            lambda x: nn.init.constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
    def __init__(self, occ_num_inputs, sign_num_inputs, recurrent):

        combined_size = occ_num_inputs[0] * 16 * 5 + sign_num_inputs

        hidden_size = int(np.power(2, np.floor(np.log2(combined_size))))

        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.lane = nn.Sequential(init_(nn.Conv1d(1, 8, 6, stride=1)),
                                  nn.ReLU(), nn.MaxPool1d(4),
                                  init_(nn.Conv1d(8, 16, 6, stride=1)),
                                  nn.ReLU(), nn.MaxPool1d(5))

        self.actor = nn.Sequential(
            init_(nn.Linear(combined_size, hidden_size)), nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU())

        self.critic = nn.Sequential(
            init_(nn.Linear(combined_size, hidden_size)), nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Esempio n. 15
0
    def __init__(self, obs_shape, recurrent=False, hidden_size=512):
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)
        num_inputs = obs_shape[0]

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(  # 84 x 84  128
            init_(nn.Conv2d(num_inputs, 32, 3, stride=2)),  # 63 x 63
            nn.ReLU(),
            init_(nn.Conv2d(32, 48, 3, stride=2)),  # 31 * 31
            nn.ReLU(),
            init_(nn.Conv2d(48, 64, 3, stride=2)),  # 15 x 15
            nn.ReLU(),
            init_(nn.Conv2d(64, 128, 3, stride=2)),  # 7 x 7
            nn.ReLU(),
            init_(nn.Conv2d(128, 64, 3, stride=1)),  # 5 x 5
            nn.ReLU(),
            Flatten(),
            init_(nn.Linear(64 * 5 * 5, hidden_size)),
            nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Esempio n. 16
0
    def __init__(self,
                 num_inputs,
                 vector_obs_len=0,
                 recurrent=False,
                 hidden_size=512):
        super(CNNBase, self).__init__(recurrent, hidden_size + vector_obs_len,
                                      hidden_size)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU())

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        if recurrent:
            self.critic_linear = init_(nn.Linear(hidden_size, 1))
        else:
            self.critic_linear = init_(
                nn.Linear(hidden_size + vector_obs_len, 1))

        self.train()
Esempio n. 17
0
    def __init__(self,
                 num_inputs,
                 input_size,
                 action_space,
                 hidden_size=512,
                 embed_size=0,
                 recurrent=False,
                 device='cpu'):

        super(CNNBase, self).__init__(recurrent, num_inputs, hidden_size,
                                      embed_size)

        self.device = device
        self.action_space = action_space

        h, w = input_size
        self.conv1 = nn.Conv2d(num_inputs, 32, kernel_size=8, stride=4)
        w_out = conv2d_size_out(w, kernel_size=8, stride=4)
        h_out = conv2d_size_out(h, kernel_size=8, stride=4)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        w_out = conv2d_size_out(w_out, kernel_size=4, stride=2)
        h_out = conv2d_size_out(h_out, kernel_size=4, stride=2)

        self.conv3 = nn.Conv2d(64, 32, kernel_size=3, stride=1)
        w_out = conv2d_size_out(w_out, kernel_size=3, stride=1)
        h_out = conv2d_size_out(h_out, kernel_size=3, stride=1)

        init_cnn_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0),
                                   nn.init.calculate_gain('relu'))

        self.cnn_trunk = nn.Sequential(
            init_cnn_(self.conv1), nn.ReLU(), init_cnn_(self.conv2), nn.ReLU(),
            init_cnn_(self.conv3), nn.ReLU(), Flatten(),
            init_cnn_(nn.Linear(32 * h_out * w_out, hidden_size)), nn.ReLU())

        init__ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                constant_(x, 0), np.sqrt(2))

        self.trunk = nn.Sequential(
            init__(
                nn.Linear(hidden_size + self.action_space.n + embed_size,
                          hidden_size // 2)), nn.Tanh(),
            init__(nn.Linear(hidden_size // 2, hidden_size // 2)), nn.Tanh(),
            init__(nn.Linear(hidden_size // 2, 1)))

        # self.optimizer = torch.optim.Adam(self.parameters(), lr=3e-5)
        self.optimizer = torch.optim.RMSprop(
            self.parameters(), lr=5e-5
        )  # To be conistent with the wgan optimizer, althougt not necessary

        self.returns = None
        self.ret_rms = RunningMeanStd(shape=())

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
Esempio n. 18
0
    def __init__(self, num_inputs, num_outputs, zll=False):
        super(Beta, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        init_zeros = lambda m: init(
            m, lambda x, **kwargs: nn.init.constant_(x, 0), lambda x: nn.init.
            constant_(x, 0))

        init_last_layer = init_zeros if zll else init_

        self.alpha_linear = init_last_layer(nn.Linear(num_inputs, num_outputs))
        self.beta_linear = init_last_layer(nn.Linear(num_inputs, num_outputs))
Esempio n. 19
0
    def __init__(self, num_inputs, num_outputs, zll=False):
        super(DiagGaussian, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        init_zeros = lambda m: init(
            m, lambda x, **kwargs: nn.init.constant_(x, 0), lambda x: nn.init.
            constant_(x, 0))

        init_last_layer = init_zeros if zll else init_

        self.fc_mean = init_last_layer(nn.Linear(num_inputs, num_outputs))
        self.logstd = AddBias(torch.zeros(num_outputs))
Esempio n. 20
0
    def __init__(
        self,
        num_inputs,
        recurrent=False,
        hidden_size=512,
        fc_size=0,
        deep=False,
        conv=None,
    ):
        """ num inputs is the number of channels """
        super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size)

        init_ = lambda m: init(
            m,
            nn.init.orthogonal_,
            lambda x: nn.init.constant_(x, 0),
            nn.init.calculate_gain("relu"),
        )
        if conv:
            self.main = conv
        else:
            self.main = nn.Sequential(
                init_(nn.Conv2d(num_inputs, 32, 8, stride=4)),
                nn.ReLU(),
                init_(nn.Conv2d(32, 64, 4, stride=2)),
                nn.ReLU(),
                init_(nn.Conv2d(64, 32, 3, stride=1)),
                nn.ReLU(),
                Flatten(),
            )

        if deep:
            self.fc = nn.Sequential(
                init_(nn.Linear(32 * 7 * 7 + fc_size, hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
            )
        else:
            self.fc = nn.Sequential(
                init_(nn.Linear(32 * 7 * 7 + fc_size, hidden_size)), nn.ReLU())
        self.fc_size = fc_size if fc_size else None

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Esempio n. 21
0
    def __init__(self,
                 num_inputs,
                 input_size,
                 action_space,
                 hidden_size=512,
                 recurrent=False,
                 device='cpu'):

        super(CNNBase, self).__init__(recurrent, num_inputs, hidden_size)

        self.device = device
        self.action_space = action_space

        h, w = input_size
        self.conv1 = nn.Conv2d(num_inputs, 32, kernel_size=8, stride=4)
        w_out = conv2d_size_out(w, kernel_size=8, stride=4)
        h_out = conv2d_size_out(h, kernel_size=8, stride=4)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        w_out = conv2d_size_out(w_out, kernel_size=4, stride=2)
        h_out = conv2d_size_out(h_out, kernel_size=4, stride=2)

        self.conv3 = nn.Conv2d(64, 32, kernel_size=3, stride=1)
        w_out = conv2d_size_out(w_out, kernel_size=3, stride=1)
        h_out = conv2d_size_out(h_out, kernel_size=3, stride=1)

        init_cnn_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0),
                                   nn.init.calculate_gain('relu'))

        self.cnn_trunk = nn.Sequential(
            init_cnn_(self.conv1), nn.ReLU(), init_cnn_(self.conv2), nn.ReLU(),
            init_cnn_(self.conv3), nn.ReLU(), Flatten(),
            init_cnn_(nn.Linear(32 * h_out * w_out, hidden_size)), nn.ReLU())

        init__ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                constant_(x, 0), np.sqrt(2))

        self.trunk = nn.Sequential(
            init__(
                nn.Linear(hidden_size + self.action_space.n,
                          hidden_size // 2)), nn.Tanh(),
            init__(nn.Linear(hidden_size // 2, hidden_size // 2)), nn.Tanh(),
            init__(nn.Linear(hidden_size // 2, 1)))

        self.optimizer = torch.optim.Adam(self.parameters())

        self.returns = None
        self.ret_rms = RunningMeanStd(shape=())
Esempio n. 22
0
    def __init__(self, obs_shape, recurrent=False, hidden_size=64):
        num_inputs = obs_shape[0]
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size)

        if recurrent:
            num_inputs = hidden_size

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                   nn.Tanh(),
                                   init_(nn.Linear(hidden_size, hidden_size)),
                                   nn.Tanh(),
                                   init_(nn.Linear(hidden_size, hidden_size)),
                                   nn.Tanh())

        self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                    nn.Tanh(),
                                    init_(nn.Linear(hidden_size, hidden_size)),
                                    nn.Tanh(),
                                    init_(nn.Linear(hidden_size, hidden_size)),
                                    nn.Tanh())

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Esempio n. 23
0
    def __init__(self, num_inputs, num_outputs):
        super(Bernoulli, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
Esempio n. 24
0
    def __init__(self,
                 obs_space,
                 obs_process,
                 obs_module,
                 action_space,
                 base_kwargs=None):
        super(Policy, self).__init__()
        self.obs_space = obs_space
        self.obs_process = obs_process
        self.obs_module = obs_module

        if base_kwargs is None:
            base_kwargs = {}

        # base takes all of the observations and produces a single feature vector
        self.base = NNBase2(obs_space, obs_process, obs_module, **base_kwargs)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.critic_linear = init_(nn.Linear(self.base.output_size, 1))

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError
Esempio n. 25
0
    def __init__(self, num_inputs, recurrent=False, hidden_size=64):
        super(MLPHardAttnBase, self).__init__(recurrent, num_inputs,
                                              hidden_size)

        num_obs_input = num_inputs

        if recurrent:
            num_inputs = hidden_size

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self.input_attention = nn.Parameter(torch.zeros(num_obs_input),
                                            requires_grad=True)

        self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                   nn.Tanh(),
                                   init_(nn.Linear(hidden_size, hidden_size)),
                                   nn.Tanh())

        self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                    nn.Tanh(),
                                    init_(nn.Linear(hidden_size, hidden_size)),
                                    nn.Tanh())

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.train()
Esempio n. 26
0
    def __init__(self, recurrent, recurrent_input_size, hidden_size, attention = 0):
        super(NNBase, self).__init__()
        
        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self._hidden_size = hidden_size
        self._recurrent = recurrent
        self._attention = attention
        
        if recurrent:
            self.gru = nn.GRU(recurrent_input_size, hidden_size)
            for name, param in self.gru.named_parameters():
                if 'bias' in name:
                    nn.init.constant_(param, 0)
                elif 'weight' in name:
                    nn.init.orthogonal_(param)

        if attention:
            self.mhat_b = MHAT2(28, 15, hidden_size // 2, hidden_size // 2, attention) # q k h out head
            self.mhat_f = MHAT2(28, 7, hidden_size // 2, hidden_size // 2, attention)
            self.mhat_v = MHAT2(28, 5, hidden_size // 2, hidden_size // 2, attention)
            self.mhat_o = MHAT2(28, 15, hidden_size // 2, hidden_size // 2, attention) # q k h out head
            self.mhat_p = MHAT2(28, 15, hidden_size // 2, hidden_size // 2, attention) # q k h out head
            self.em = nn.Sequential(init_(nn.Linear(28 + hidden_size // 2 * 5, recurrent_input_size)), nn.ReLU())
Esempio n. 27
0
    def __init__(self,
                 num_inputs,
                 input_size,
                 action_space,
                 hidden_size=64,
                 recurrent=False,
                 device='cpu'):
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size)

        self.device = device

        if recurrent:
            num_inputs = hidden_size

        init__ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                constant_(x, 0), np.sqrt(2))

        self.trunk = nn.Sequential(
            init__(nn.Linear(num_inputs + action_space.shape[0], hidden_size)),
            nn.Tanh(), init__(nn.Linear(hidden_size, hidden_size)), nn.Tanh(),
            init__(nn.Linear(hidden_size, 1)))

        self.optimizer = torch.optim.Adam(self.parameters())

        self.returns = None
        self.ret_rms = RunningMeanStd(shape=())

        self.train()
Esempio n. 28
0
    def __init__(self, obs_shape, action_space, base=None, base_kwargs=None):
        #def __init__(self, obs_shape, action_space,action_space2, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}
        # if base is None:
        #     if len(obs_shape) == 3:
        #         base = CNNBase
        #     elif len(obs_shape) == 1:
        #         base = MLPBase
        #     else:
        #         raise NotImplementedError
        #base = base
        self.base = base  #base(obs_shape[0], **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            #num_outputs2 = action_space2.n
            self.dist = Categorical(self.base.output_size, num_outputs)
            #self.dist2 = Categorical(self.base.output_size, num_outputs2)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(512, 1))
Esempio n. 29
0
    def __init__(self,
                 num_inputs,
                 recurrent=False,
                 hidden_size=64,
                 est_beta_value=False):
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size)

        self.est_beta_value = est_beta_value

        if recurrent:
            num_inputs = hidden_size

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), np.sqrt(2))

        self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                   nn.Tanh(),
                                   init_(nn.Linear(hidden_size, hidden_size)),
                                   nn.Tanh())

        self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)),
                                    nn.Tanh(),
                                    init_(nn.Linear(hidden_size, hidden_size)),
                                    nn.Tanh())

        self.critic_linear = init_(nn.Linear(hidden_size, 1))

        self.beta_value_net = nn.Sequential(init_(nn.Linear(hidden_size, 1)),
                                            nn.Sigmoid())
        #self.critic_linear = nn.Sequential(init_(nn.Linear(hidden_size, 1)), nn.Sigmoid())

        self.train()
Esempio n. 30
0
    def __init__(self,
                 num_inputs,
                 input_size,
                 action_space,
                 hidden_size=64,
                 embed_size=0,
                 recurrent=False,
                 device='cpu'):
        super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size,
                                      embed_size)

        self.device = device

        if recurrent:
            num_inputs = hidden_size

        init__ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                constant_(x, 0), np.sqrt(2))

        self.trunk = nn.Sequential(
            init__(
                nn.Linear(num_inputs + action_space.shape[0] + embed_size,
                          hidden_size)), nn.Tanh(),
            init__(nn.Linear(hidden_size, hidden_size)), nn.Tanh(),
            init__(nn.Linear(hidden_size, 1)))

        # self.optimizer = torch.optim.Adam(self.parameters(), lr= 3e-5)
        self.optimizer = torch.optim.RMSprop(self.parameters(), lr=5e-5)

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        self.returns = None
        self.ret_rms = RunningMeanStd(shape=())

        self.train()