def __init__(self, num_inputs, use_gru): super(CNNBase, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, 512)), nn.ReLU()) if use_gru: self.gru = nn.GRUCell(512, 512) nn.init.orthogonal_(self.gru.weight_ih.data) nn.init.orthogonal_(self.gru.weight_hh.data) self.gru.bias_ih.data.fill_(0) self.gru.bias_hh.data.fill_(0) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(512, 1)) self.train()
def __init__(self, num_inputs, recurrent=False, hidden_size=64): super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size) if recurrent: num_inputs = hidden_size init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) layer_sizes = [num_inputs, hidden_size, hidden_size] layers = reduce(operator.add, [[nn.Linear(a, b), nn.ReLU()] for a, b in zip(layer_sizes[0:-1], layer_sizes[1:])]) self.actor = nn.Sequential(*layers) layers = reduce(operator.add, [[nn.Linear(a, b), nn.ReLU()] for a, b in zip(layer_sizes[0:-1], layer_sizes[1:])]) self.critic = nn.Sequential(*layers) # self.actor = nn.Sequential( # init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), # init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) # # self.critic = nn.Sequential( # init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), # init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, num_outputs): super(Bernoulli, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.linear = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, num_inputs, num_outputs): super(DiagGaussian, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.fc_mean = init_(nn.Linear(num_inputs, num_outputs)) self.logstd = AddBias(torch.zeros(num_outputs))
def __init__(self, num_inputs, recurrent=False, hidden_size=512): super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, num_outputs): super(Categorical, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=0.01) self.linear = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, num_inputs): super(MLPBase, self).__init__() init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_( x, 0)) self.actor = nn.Sequential(init_(nn.Linear(num_inputs, 64)), nn.Tanh(), init_(nn.Linear(64, 64)), nn.Tanh()) self.critic = nn.Sequential(init_(nn.Linear(num_inputs, 64)), nn.Tanh(), init_(nn.Linear(64, 64)), nn.Tanh()) self.critic_linear = init_(nn.Linear(64, 1)) self.train()
def __init__(self, perception, action_space, internal_state_size=128): ''' Args: base: A unit which of type ActorCriticModule ''' super().__init__() self.perception_unit = perception self.gru = nn.GRUCell(input_size=internal_state_size, hidden_size=internal_state_size) # Make the critic init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_( nn.Linear(self.perception_unit.output_size, 1)) num_outputs = action_space.n self.dist = Categorical(internal_state_size, num_outputs) self.l2 = nn.MSELoss() self.l1 = nn.L1Loss()
def __init__(self, perception, action_space, num_stack=4): ''' Args: base: A unit which of type ActorCriticModule ''' super().__init__() self.perception_unit = perception # Make the critic init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_( nn.Linear(self.perception_unit.output_size, 1)) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.perception_unit.output_size, num_outputs) else: raise NotImplementedError self.l2 = nn.MSELoss() self.l1 = nn.L1Loss()
def init_(m): return init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2))