def __init__(self, num_inputs, vector_obs_len=0, recurrent=False, hidden_size=512): super(CNNBase, self).__init__(recurrent, hidden_size + vector_obs_len, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) if recurrent: self.critic_linear = init_(nn.Linear(hidden_size, 1)) else: self.critic_linear = init_( nn.Linear(hidden_size + vector_obs_len, 1)) self.train()
def __init__(self, num_inputs, num_outputs): super(Bernoulli, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.linear = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, num_inputs, hidden_size, num_layers, recurrent, activation): assert num_layers > 0 super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size) if recurrent: num_inputs = hidden_size init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(x, 0)) self.actor = nn.Sequential() self.critic = nn.Sequential() for i in range(num_layers): self.actor.add_module( name=f"fc{i}", module=nn.Sequential( init_(nn.Linear(num_inputs, hidden_size)), activation ), ) self.critic.add_module( name=f"fc{i}", module=nn.Sequential( init_(nn.Linear(num_inputs, hidden_size)), activation ), ) num_inputs = hidden_size self.critic_linear = init_(nn.Linear(num_inputs, 1)) self.train()
def __init__(self, num_inputs, num_outputs): super(DiagGaussian, self).__init__() init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_( x, 0)) self.fc_mean = init_(nn.Linear(num_inputs, num_outputs)) self.logstd = AddBias(torch.zeros(num_outputs))
def __init__(self, num_inputs, num_outputs): super(Categorical, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=0.01) self.linear = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, num_obs_inputs, num_act_inputs, hidden_size=64): super(MetaMLP, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.meta_reward = nn.Sequential( init_(nn.Linear(num_obs_inputs + num_act_inputs, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, 1)), nn.Tanh()) # added tanh like in paper self.meta_critic = nn.Sequential( init_(nn.Linear(num_obs_inputs, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, 1))) self.train()
def __init__(self, num_inputs, recurrent=False, hidden_size=64): super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size) if recurrent: num_inputs = hidden_size init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.actor = nn.Sequential( init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic = nn.Sequential( init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, hidden_size=64): super(PolicyMLP, self).__init__() self.hidden_size = hidden_size init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU()) self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU()) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__( self, hidden_size, num_layers, recurrent, obs_space, num_conv_layers, kernel_size, stride, activation=nn.ReLU(), **_, ): if type(obs_space) is spaces.Dict: obs_space = Obs(**obs_space.spaces) assert num_layers > 0 H = hidden_size super().__init__( recurrent=recurrent, recurrent_input_size=H, hidden_size=hidden_size ) self.register_buffer( "subtasks", torch.tensor( [Env.preprocess_line(Subtask(s)) for s in subtasks()] + [[0, 0, 0, 0]] ), ) (d, h, w) = obs_space.obs.shape inventory_size = obs_space.inventory.n line_nvec = torch.tensor(obs_space.lines.nvec) offset = F.pad(line_nvec[0, :-1].cumsum(0), [1, 0]) self.register_buffer("offset", offset) self.obs_spaces = obs_space self.obs_sections = get_obs_sections(self.obs_spaces) padding = (kernel_size // 2) % stride self.conv = nn.Sequential() in_size = d assert num_conv_layers > 0 for i in range(num_conv_layers): self.conv.add_module( name=f"conv{i}", module=nn.Sequential( init_( nn.Conv2d( in_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding, ) ), activation, ), ) in_size = hidden_size h = w = (h + (2 * padding) - (kernel_size - 1) - 1) // stride + 1 kernel_size = min(h, kernel_size) self.conv.add_module(name="flatten", module=Flatten()) init2 = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(x, 0)) self.conv_projection = nn.Sequential( init2(nn.Linear(h * w * hidden_size, hidden_size)), activation ) self.line_embed = nn.EmbeddingBag(line_nvec[0].sum(), hidden_size) self.inventory_embed = nn.Sequential( init2(nn.Linear(inventory_size, hidden_size)), activation ) self.mlp = nn.Sequential() in_size = hidden_size if recurrent else H for i in range(num_layers): self.mlp.add_module( name=f"fc{i}", module=nn.Sequential( init2(nn.Linear(in_size, hidden_size)), activation ), ) in_size = hidden_size self.critic_linear = init2(nn.Linear(in_size, 1)) self._output_size = in_size self.train()