def __init__(self, num_inputs, recurrent=False, hidden_size=32): super(GridBase, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 8, 4, stride=2, padding=(7, 0))), nn.ReLU(), init_(nn.Conv2d(8, 16, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(16, 8, 3, stride=2)), nn.ReLU(), Flatten(), init_(nn.Linear(8 * 3 * 3, hidden_size)), nn.ReLU()) # self.main = nn.Sequential( # Flatten(), # init_(nn.Linear(4 * 680, 1024)), nn.ReLU(), # init_(nn.Linear(1024, 256)), nn.ReLU(), # init_(nn.Linear(256, 64)), nn.ReLU(), # init_(nn.Linear(64, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, obs_shape, recurrent=False, hidden_size=64): super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) n = obs_shape[1] m = obs_shape[2] kernel_size = 2 image_embedding_size = ( (n-1)//kernel_size-kernel_size)*((m-1)//kernel_size-kernel_size)*64 self.main = nn.Sequential( nn.Conv2d(obs_shape[0], 16, kernel_size), nn.ReLU(), nn.MaxPool2d(kernel_size), nn.Conv2d(16, 32, kernel_size), nn.ReLU(), nn.Conv2d(32, 64, kernel_size), nn.ReLU(), Flatten(), init_(nn.Linear(image_embedding_size, hidden_size)), nn.ReLU()) # self.main = nn.Sequential( # init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), # init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), # init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), # init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, recurrent=False, hidden_size=512): super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 1, stride=1)), nn.ReLU(), # out: 128x128x32 init_(nn.Conv2d(32, 32, 4, stride=2)), nn.ReLU(), # out: 63x63x32 init_(nn.Conv2d(32, 32, 5, stride=2)), nn.ReLU(), # out: 30x30x32 init_(nn.Conv2d(32, 32, 4, stride=2)), nn.ReLU(), # out: 14x14x32 init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), # out: 6x6x64 Flatten()) self.linear = nn.Sequential(init_(nn.Linear(6*6*64, hidden_size)), nn.ReLU()) # self.main = nn.Sequential( # init_(nn.Conv2d(num_inputs, 32, 1, stride=1)), nn.ReLU(), # init_(nn.Conv2d(32, 32, 3, stride=2)), nn.ReLU(), # init_(nn.Conv2d(32, 64, 5, stride=2)), nn.ReLU(), # init_(nn.Conv2d(64, 32, 5, stride=2)), nn.ReLU(), Flatten(), # init_(nn.Linear(32 * 8 * 8, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, cfg, obs_space, action_space): num_inputs = obs_space[0] recurrent = cfg.recurrent hidden_size = cfg.hidden_size use_init = cfg.use_init super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) if use_init: init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) else: init_ = lambda m: init_null(m, nn.init.orthogonal_, lambda x: nn. init.constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) if use_init: init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) else: init_ = lambda m: init_null(m, nn.init.orthogonal_, lambda x: nn. init.constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, old_model, recurrent=False, hidden_size=512): super(CNNBaseNew, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.old_model = old_model self.main1 = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU()) self.main2 = nn.Sequential(init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU()) self.main3 = nn.Sequential(init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten()) self.main4 = nn.Sequential(init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, recurrent=False, hidden_size=512): super(CNNBase64, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) # CNN for 64x64 self.main = nn.Sequential( # input (3, 64, 64) init_(nn.Conv2d(num_inputs, 32, 6, stride=4, padding=1)), nn.ReLU(), # input (3, 16, 16) init_(nn.Conv2d(32, 64, 4, stride=2, padding=2)), nn.ReLU(), # input (3, 9, 9) init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, recurrent=False, hidden_size=64, est_beta_value=False): super(CNN_minigrid, self).__init__(recurrent, hidden_size, hidden_size) self.est_beta_value = est_beta_value init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential(init_(nn.Conv2d(3, 16, (2, 2))), nn.ReLU(), nn.MaxPool2d((2, 2)), init_(nn.Conv2d(16, 32, (2, 2))), nn.ReLU(), init_(nn.Conv2d(32, 64, (2, 2))), nn.ReLU(), Flatten(), init_(nn.Linear(64, hidden_size)), nn.Tanh()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.beta_value_net = nn.Sequential(init_(nn.Linear(hidden_size, 1)), nn.Sigmoid()) self.train()
def __init__(self, num_inputs, recurrent=False, hidden_size=512, est_beta_value=False): super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) self.est_beta_value = est_beta_value init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.beta_value_net = nn.Sequential(init_(nn.Linear(hidden_size, 1)), nn.Sigmoid()) self.train()
def __init__(self, num_inputs, recurrent=False, hidden_size=512): super(KeyValueBase, self).__init__(recurrent, hidden_size, hidden_size) self.hidden_size = hidden_size init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs-3, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) self.kv_extractor = nn.Sequential( init_(nn.Conv2d(1, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size*2)), nn.ReLU()) self.embedding_merge = nn.Sequential( init_(nn.Linear(2*hidden_size, 700)), nn.ReLU(), init_(nn.Linear(700, hidden_size)), nn.ReLU() ) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, recurrent=False, hidden_size=512, normalize=True): super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.conv1 = (nn.Conv2d(num_inputs, 32, 8, stride=4)) self.conv2 = (nn.Conv2d(32, 64, 4, stride=2)) self.conv3 = (nn.Conv2d(64, 64, 3, stride=1)) self.fc1 = (nn.Linear(32 * 7 * 7 * 2, hidden_size)) self.relu = nn.ReLU() self.flatten = Flatten() self.critic_linear = (nn.Linear(hidden_size, 1)) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = (nn.Linear(hidden_size, 1)) self.normalize = normalize self.train()
def __init__(self, num_inputs, recurrent=False, hidden_size=512): """ self.main + self.critic_learner = actor self.train + self.dist = critic :param num_inputs: :param recurrent: :param hidden_size: """ super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) # weight, bias initialization init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) # print(self.main) # weight, bias initialization init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) # print(self.critic_linear) self.train() # sets the module in training mode.
def __init__(self, in_features, out_features, bias=True, init_type=3, can_split=True, actv_fn='relu', has_bn=False): super().__init__(can_split=can_split, actv_fn=actv_fn, has_bn=has_bn) self.has_bias = bias if has_bn: self.bn = nn.BatchNorm1d(out_features) self.has_bias = False init1_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=0.01) init2_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) init3_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.module = nn.Linear(in_features, out_features, self.has_bias) init_dict = { 1: init1_, 2: init2_, 3: init3_, } init_dict[init_type](self.module)
def __init__(self, num_inputs, recurrent=True, hidden_size=512): super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)), nn.ELU(), init_(nn.Conv2d(32, 32, 3, stride=2, padding=1)), nn.ELU(), init_(nn.Conv2d(32, 32, 3, stride=2, padding=1)), nn.ELU(), init_(nn.Conv2d(32, 32, 3, stride=2, padding=1)), nn.ELU(), Flatten(), init_(nn.Linear(32 * 6 * 6, hidden_size)), nn.ReLU() ) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, occ_num_inputs, sign_num_inputs, recurrent): combined_size = occ_num_inputs[0] * 16 * 5 + sign_num_inputs hidden_size = int(np.power(2, np.floor(np.log2(combined_size)))) super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.lane = nn.Sequential(init_(nn.Conv1d(1, 8, 6, stride=1)), nn.ReLU(), nn.MaxPool1d(4), init_(nn.Conv1d(8, 16, 6, stride=1)), nn.ReLU(), nn.MaxPool1d(5)) self.actor = nn.Sequential( init_(nn.Linear(combined_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU()) self.critic = nn.Sequential( init_(nn.Linear(combined_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, obs_shape, recurrent=False, hidden_size=512): super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) num_inputs = obs_shape[0] init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( # 84 x 84 128 init_(nn.Conv2d(num_inputs, 32, 3, stride=2)), # 63 x 63 nn.ReLU(), init_(nn.Conv2d(32, 48, 3, stride=2)), # 31 * 31 nn.ReLU(), init_(nn.Conv2d(48, 64, 3, stride=2)), # 15 x 15 nn.ReLU(), init_(nn.Conv2d(64, 128, 3, stride=2)), # 7 x 7 nn.ReLU(), init_(nn.Conv2d(128, 64, 3, stride=1)), # 5 x 5 nn.ReLU(), Flatten(), init_(nn.Linear(64 * 5 * 5, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, vector_obs_len=0, recurrent=False, hidden_size=512): super(CNNBase, self).__init__(recurrent, hidden_size + vector_obs_len, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) if recurrent: self.critic_linear = init_(nn.Linear(hidden_size, 1)) else: self.critic_linear = init_( nn.Linear(hidden_size + vector_obs_len, 1)) self.train()
def __init__(self, num_inputs, input_size, action_space, hidden_size=512, embed_size=0, recurrent=False, device='cpu'): super(CNNBase, self).__init__(recurrent, num_inputs, hidden_size, embed_size) self.device = device self.action_space = action_space h, w = input_size self.conv1 = nn.Conv2d(num_inputs, 32, kernel_size=8, stride=4) w_out = conv2d_size_out(w, kernel_size=8, stride=4) h_out = conv2d_size_out(h, kernel_size=8, stride=4) self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2) w_out = conv2d_size_out(w_out, kernel_size=4, stride=2) h_out = conv2d_size_out(h_out, kernel_size=4, stride=2) self.conv3 = nn.Conv2d(64, 32, kernel_size=3, stride=1) w_out = conv2d_size_out(w_out, kernel_size=3, stride=1) h_out = conv2d_size_out(h_out, kernel_size=3, stride=1) init_cnn_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.cnn_trunk = nn.Sequential( init_cnn_(self.conv1), nn.ReLU(), init_cnn_(self.conv2), nn.ReLU(), init_cnn_(self.conv3), nn.ReLU(), Flatten(), init_cnn_(nn.Linear(32 * h_out * w_out, hidden_size)), nn.ReLU()) init__ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.trunk = nn.Sequential( init__( nn.Linear(hidden_size + self.action_space.n + embed_size, hidden_size // 2)), nn.Tanh(), init__(nn.Linear(hidden_size // 2, hidden_size // 2)), nn.Tanh(), init__(nn.Linear(hidden_size // 2, 1))) # self.optimizer = torch.optim.Adam(self.parameters(), lr=3e-5) self.optimizer = torch.optim.RMSprop( self.parameters(), lr=5e-5 ) # To be conistent with the wgan optimizer, althougt not necessary self.returns = None self.ret_rms = RunningMeanStd(shape=()) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def __init__(self, num_inputs, num_outputs, zll=False): super(Beta, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) init_zeros = lambda m: init( m, lambda x, **kwargs: nn.init.constant_(x, 0), lambda x: nn.init. constant_(x, 0)) init_last_layer = init_zeros if zll else init_ self.alpha_linear = init_last_layer(nn.Linear(num_inputs, num_outputs)) self.beta_linear = init_last_layer(nn.Linear(num_inputs, num_outputs))
def __init__(self, num_inputs, num_outputs, zll=False): super(DiagGaussian, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) init_zeros = lambda m: init( m, lambda x, **kwargs: nn.init.constant_(x, 0), lambda x: nn.init. constant_(x, 0)) init_last_layer = init_zeros if zll else init_ self.fc_mean = init_last_layer(nn.Linear(num_inputs, num_outputs)) self.logstd = AddBias(torch.zeros(num_outputs))
def __init__( self, num_inputs, recurrent=False, hidden_size=512, fc_size=0, deep=False, conv=None, ): """ num inputs is the number of channels """ super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init( m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), nn.init.calculate_gain("relu"), ) if conv: self.main = conv else: self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), ) if deep: self.fc = nn.Sequential( init_(nn.Linear(32 * 7 * 7 + fc_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), ) else: self.fc = nn.Sequential( init_(nn.Linear(32 * 7 * 7 + fc_size, hidden_size)), nn.ReLU()) self.fc_size = fc_size if fc_size else None init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, input_size, action_space, hidden_size=512, recurrent=False, device='cpu'): super(CNNBase, self).__init__(recurrent, num_inputs, hidden_size) self.device = device self.action_space = action_space h, w = input_size self.conv1 = nn.Conv2d(num_inputs, 32, kernel_size=8, stride=4) w_out = conv2d_size_out(w, kernel_size=8, stride=4) h_out = conv2d_size_out(h, kernel_size=8, stride=4) self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2) w_out = conv2d_size_out(w_out, kernel_size=4, stride=2) h_out = conv2d_size_out(h_out, kernel_size=4, stride=2) self.conv3 = nn.Conv2d(64, 32, kernel_size=3, stride=1) w_out = conv2d_size_out(w_out, kernel_size=3, stride=1) h_out = conv2d_size_out(h_out, kernel_size=3, stride=1) init_cnn_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.cnn_trunk = nn.Sequential( init_cnn_(self.conv1), nn.ReLU(), init_cnn_(self.conv2), nn.ReLU(), init_cnn_(self.conv3), nn.ReLU(), Flatten(), init_cnn_(nn.Linear(32 * h_out * w_out, hidden_size)), nn.ReLU()) init__ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.trunk = nn.Sequential( init__( nn.Linear(hidden_size + self.action_space.n, hidden_size // 2)), nn.Tanh(), init__(nn.Linear(hidden_size // 2, hidden_size // 2)), nn.Tanh(), init__(nn.Linear(hidden_size // 2, 1))) self.optimizer = torch.optim.Adam(self.parameters()) self.returns = None self.ret_rms = RunningMeanStd(shape=())
def __init__(self, obs_shape, recurrent=False, hidden_size=64): num_inputs = obs_shape[0] super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size) if recurrent: num_inputs = hidden_size init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, num_outputs): super(Bernoulli, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.linear = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, obs_space, obs_process, obs_module, action_space, base_kwargs=None): super(Policy, self).__init__() self.obs_space = obs_space self.obs_process = obs_process self.obs_module = obs_module if base_kwargs is None: base_kwargs = {} # base takes all of the observations and produces a single feature vector self.base = NNBase2(obs_space, obs_process, obs_module, **base_kwargs) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(self.base.output_size, 1)) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, num_inputs, recurrent=False, hidden_size=64): super(MLPHardAttnBase, self).__init__(recurrent, num_inputs, hidden_size) num_obs_input = num_inputs if recurrent: num_inputs = hidden_size init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.input_attention = nn.Parameter(torch.zeros(num_obs_input), requires_grad=True) self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, recurrent, recurrent_input_size, hidden_size, attention = 0): super(NNBase, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self._hidden_size = hidden_size self._recurrent = recurrent self._attention = attention if recurrent: self.gru = nn.GRU(recurrent_input_size, hidden_size) for name, param in self.gru.named_parameters(): if 'bias' in name: nn.init.constant_(param, 0) elif 'weight' in name: nn.init.orthogonal_(param) if attention: self.mhat_b = MHAT2(28, 15, hidden_size // 2, hidden_size // 2, attention) # q k h out head self.mhat_f = MHAT2(28, 7, hidden_size // 2, hidden_size // 2, attention) self.mhat_v = MHAT2(28, 5, hidden_size // 2, hidden_size // 2, attention) self.mhat_o = MHAT2(28, 15, hidden_size // 2, hidden_size // 2, attention) # q k h out head self.mhat_p = MHAT2(28, 15, hidden_size // 2, hidden_size // 2, attention) # q k h out head self.em = nn.Sequential(init_(nn.Linear(28 + hidden_size // 2 * 5, recurrent_input_size)), nn.ReLU())
def __init__(self, num_inputs, input_size, action_space, hidden_size=64, recurrent=False, device='cpu'): super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size) self.device = device if recurrent: num_inputs = hidden_size init__ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.trunk = nn.Sequential( init__(nn.Linear(num_inputs + action_space.shape[0], hidden_size)), nn.Tanh(), init__(nn.Linear(hidden_size, hidden_size)), nn.Tanh(), init__(nn.Linear(hidden_size, 1))) self.optimizer = torch.optim.Adam(self.parameters()) self.returns = None self.ret_rms = RunningMeanStd(shape=()) self.train()
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): #def __init__(self, obs_shape, action_space,action_space2, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} # if base is None: # if len(obs_shape) == 3: # base = CNNBase # elif len(obs_shape) == 1: # base = MLPBase # else: # raise NotImplementedError #base = base self.base = base #base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n #num_outputs2 = action_space2.n self.dist = Categorical(self.base.output_size, num_outputs) #self.dist2 = Categorical(self.base.output_size, num_outputs2) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(512, 1))
def __init__(self, num_inputs, recurrent=False, hidden_size=64, est_beta_value=False): super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size) self.est_beta_value = est_beta_value if recurrent: num_inputs = hidden_size init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.beta_value_net = nn.Sequential(init_(nn.Linear(hidden_size, 1)), nn.Sigmoid()) #self.critic_linear = nn.Sequential(init_(nn.Linear(hidden_size, 1)), nn.Sigmoid()) self.train()
def __init__(self, num_inputs, input_size, action_space, hidden_size=64, embed_size=0, recurrent=False, device='cpu'): super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size, embed_size) self.device = device if recurrent: num_inputs = hidden_size init__ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.trunk = nn.Sequential( init__( nn.Linear(num_inputs + action_space.shape[0] + embed_size, hidden_size)), nn.Tanh(), init__(nn.Linear(hidden_size, hidden_size)), nn.Tanh(), init__(nn.Linear(hidden_size, 1))) # self.optimizer = torch.optim.Adam(self.parameters(), lr= 3e-5) self.optimizer = torch.optim.RMSprop(self.parameters(), lr=5e-5) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.returns = None self.ret_rms = RunningMeanStd(shape=()) self.train()