def __init__(self, num_inputs, num_outputs): super(Bernoulli, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.linear = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, num_inputs, num_outputs): super(DiagGaussian, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.fc_mean = init_(nn.Linear(num_inputs, num_outputs)) self.logstd = AddBias(torch.zeros(num_outputs))
def __init__(self, num_inputs, recurrent=False, hidden_size=512): super(CNNBase, self).__init__(recurrent, hidden_size, hidden_size) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()
def __init__(self, num_inputs, num_outputs): super(Categorical, self).__init__() init_ = lambda m: init( m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=0.01) self.linear = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, obs_shape, action_space): super(Discriminator, self).__init__() # atari game if (action_space.__class__.__name__ == "Discrete") and (len(obs_shape) == 3): action_dim = action_space.n state_dim = obs_shape[0] relu_init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn. init.constant_(x, 0), nn.init.calculate_gain('relu')) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.main = nn.Sequential( relu_init_(nn.Conv2d(state_dim, 32, 8, stride=4)), nn.ReLU(), relu_init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), relu_init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), relu_init_(nn.Linear(32 * 7 * 7, 512)), nn.ReLU(), init_(nn.Linear(512, action_dim))) # mujoco elif (action_space.__class__.__name__ == "Box") and (len(obs_shape) == 1): action_dim = action_space.shape[0] state_dim = obs_shape[0] init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Linear(state_dim + action_dim, 64)), nn.ReLU(), init_(nn.Linear(64, 64)), nn.ReLU(), init_(nn.Linear(64, 1))) # elif action_space.__class__.__name__ == "MultiBinary": # action_dim = action_space.shape[0] else: raise NotImplementedError self.train()
def __init__(self, obs_shape, action_space): super(Critic, self).__init__() if (action_space.__class__.__name__ == "Box") and (len(obs_shape) == 1): action_dim = action_space.shape[0] state_dim = obs_shape[0] # init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. # constant_(x, 0), nn.init.calculate_gain('tanh')) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.main = nn.Sequential( init_(nn.Linear(state_dim + action_dim, 64)), nn.Tanh(), init_(nn.Linear(64, 64)), nn.Tanh(), init_(nn.Linear(64, 1))) else: raise NotImplementedError self.train()
def __init__(self, obs_shape, action_space): super(Actor, self).__init__() if (action_space.__class__.__name__ == "Box") and (len(obs_shape) == 1): action_dim = action_space.shape[0] state_dim = obs_shape[0] # init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. # constant_(x, 0), nn.init.calculate_gain('tanh')) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.base = nn.Sequential(init_(nn.Linear(state_dim, 64)), nn.Tanh(), init_(nn.Linear(64, 64)), nn.Tanh()) num_outputs = action_space.shape[0] self.dist = DiagGaussian(64, num_outputs) else: raise NotImplementedError
def __init__(self, num_inputs, recurrent=False, hidden_size=64): super(MLPBase, self).__init__(recurrent, num_inputs, hidden_size) if recurrent: num_inputs = hidden_size init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), np.sqrt(2)) self.actor = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic = nn.Sequential(init_(nn.Linear(num_inputs, hidden_size)), nn.Tanh(), init_(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.critic_linear = init_(nn.Linear(hidden_size, 1)) self.train()