Ejemplo n.º 1
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.env = env
     self.device = device
     
     self.feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [400, 300])
     self.Q_head = nn.Linear(300, 1)
     
     self.to(self.device)
Ejemplo n.º 2
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.env = env
     self.device = device
     
     self.feature_layers = make_fc(flatdim(env.observation_space), [256, 256])
     self.mean_head = nn.Linear(256, flatdim(env.action_space))
     self.logstd_head = nn.Linear(256, flatdim(env.action_space))
     
     self.to(device)
Ejemplo n.º 3
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(flatdim(env.observation_space),
                                      [256, 256])
        self.action_head = TanhDiagGaussianHead(256, flatdim(env.action_space),
                                                device, **kwargs)

        self.to(device)
Ejemplo n.º 4
0
 def __init__(self, env, capacity, device):
     self.env = env
     self.capacity = capacity
     self.device = device
     
     self.observations = np.zeros([capacity, flatdim(env.observation_space)], dtype=np.float32)
     self.actions = np.zeros([capacity, flatdim(env.action_space)], dtype=np.float32)
     self.rewards = np.zeros(capacity, dtype=np.float32)
     self.next_observations = np.zeros([capacity, flatdim(env.observation_space)], dtype=np.float32)
     self.masks = np.zeros(capacity, dtype=np.float32)
     
     self.size = 0
     self.pointer = 0
Ejemplo n.º 5
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.env = env
     self.device = device
     
     self.feature_layers = make_fc(flatdim(env.observation_space), [400, 300])
     self.action_head = nn.Linear(300, flatdim(env.action_space))
     
     assert np.unique(env.action_space.high).size == 1
     assert -np.unique(env.action_space.low).item() == np.unique(env.action_space.high).item()
     self.max_action = env.action_space.high[0]
     
     self.to(self.device)
Ejemplo n.º 6
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.env = env
     self.device = device
     
     # Q1
     self.first_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256])
     self.first_Q_head = nn.Linear(256, 1)
     
     # Q2
     self.second_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256])
     self.second_Q_head = nn.Linear(256, 1)
     
     self.to(self.device)
Ejemplo n.º 7
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)

        feature_dim = config['nn.sizes'][-1]
        self.feature_network = MLP(config, env, device, **kwargs)
        if isinstance(env.action_space, Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n,
                                               device, **kwargs)
        elif isinstance(env.action_space, Box):
            self.action_head = DiagGaussianHead(feature_dim,
                                                flatdim(env.action_space),
                                                device, config['agent.std0'],
                                                config['agent.std_style'],
                                                config['agent.std_range'],
                                                config['agent.beta'], **kwargs)
        self.V_head = nn.Linear(feature_dim, 1).to(device)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)

        self.total_timestep = 0

        self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
        if config['agent.use_lr_scheduler']:
            self.lr_scheduler = linear_lr_scheduler(self.optimizer,
                                                    config['train.timestep'],
                                                    min_lr=1e-8)
Ejemplo n.º 8
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)

        self.actor = Actor(config, env, device, **kwargs)
        self.actor_optimizer = optim.Adam(self.actor.parameters(),
                                          lr=config['agent.actor.lr'])

        self.critic = Critic(config, env, device, **kwargs)
        self.critic_target = Critic(config, env, device, **kwargs)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_target.eval()
        self.critic_optimizer = optim.Adam(self.critic.parameters(),
                                           lr=config['agent.critic.lr'])

        self.target_entropy = -float(flatdim(env.action_space))
        self.log_alpha = nn.Parameter(
            torch.tensor(np.log(config['agent.alpha0'])).to(device))
        self.log_alpha_optimizer = optim.Adam([self.log_alpha], lr=1e-3)

        self.optimizer_zero_grad = lambda: [
            opt.zero_grad() for opt in [
                self.actor_optimizer, self.critic_optimizer, self.
                log_alpha_optimizer
            ]
        ]
Ejemplo n.º 9
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.env = env
     self.device = device
     
     self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes'])
     self.layer_norms = nn.ModuleList([nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']])
     self.to(self.device)
Ejemplo n.º 10
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(config, env, device, **kwargs)
     
     self.feature_network = MLP(config, env, device, **kwargs)
     feature_dim = config['nn.sizes'][-1]
     if isinstance(env.action_space, Discrete):
         self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs)
     elif isinstance(env.action_space, Box):
         self.action_head = DiagGaussianHead(feature_dim, flatdim(env.action_space), device, config['agent.std0'], **kwargs)
     self.total_timestep = 0
Ejemplo n.º 11
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(flatdim(env.observation_space),
                                      config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='tanh', constant_bias=0.0)

        self.to(self.device)
Ejemplo n.º 12
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(flatdim(env.observation_space),
                                      config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='tanh', constant_bias=0.0)

        feature_dim = config['nn.sizes'][-1]
        if isinstance(env.action_space, Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n,
                                               device, **kwargs)
        elif isinstance(env.action_space, Box):
            self.action_head = DiagGaussianHead(feature_dim,
                                                flatdim(env.action_space),
                                                device, config['agent.std0'],
                                                **kwargs)

        self.to(self.device)
Ejemplo n.º 13
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(flatdim(env.observation_space),
                                      config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
        self.layer_norms = nn.ModuleList(
            [nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']])

        self.to(self.device)
Ejemplo n.º 14
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(flatdim(env.observation_space),
                                      config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='tanh', constant_bias=0.0)

        feature_dim = config['nn.sizes'][-1]
        self.V_head = nn.Linear(feature_dim, 1)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)

        self.to(self.device)