def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [400, 300]) self.Q_head = nn.Linear(300, 1) self.to(self.device)
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space), [256, 256]) self.mean_head = nn.Linear(256, flatdim(env.action_space)) self.logstd_head = nn.Linear(256, flatdim(env.action_space)) self.to(device)
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space), [256, 256]) self.action_head = TanhDiagGaussianHead(256, flatdim(env.action_space), device, **kwargs) self.to(device)
def __init__(self, env, capacity, device): self.env = env self.capacity = capacity self.device = device self.observations = np.zeros([capacity, flatdim(env.observation_space)], dtype=np.float32) self.actions = np.zeros([capacity, flatdim(env.action_space)], dtype=np.float32) self.rewards = np.zeros(capacity, dtype=np.float32) self.next_observations = np.zeros([capacity, flatdim(env.observation_space)], dtype=np.float32) self.masks = np.zeros(capacity, dtype=np.float32) self.size = 0 self.pointer = 0
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space), [400, 300]) self.action_head = nn.Linear(300, flatdim(env.action_space)) assert np.unique(env.action_space.high).size == 1 assert -np.unique(env.action_space.low).item() == np.unique(env.action_space.high).item() self.max_action = env.action_space.high[0] self.to(self.device)
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device # Q1 self.first_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256]) self.first_Q_head = nn.Linear(256, 1) # Q2 self.second_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256]) self.second_Q_head = nn.Linear(256, 1) self.to(self.device)
def __init__(self, config, env, device, **kwargs): super().__init__(config, env, device, **kwargs) feature_dim = config['nn.sizes'][-1] self.feature_network = MLP(config, env, device, **kwargs) if isinstance(env.action_space, Discrete): self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs) elif isinstance(env.action_space, Box): self.action_head = DiagGaussianHead(feature_dim, flatdim(env.action_space), device, config['agent.std0'], config['agent.std_style'], config['agent.std_range'], config['agent.beta'], **kwargs) self.V_head = nn.Linear(feature_dim, 1).to(device) ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0) self.total_timestep = 0 self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr']) if config['agent.use_lr_scheduler']: self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8)
def __init__(self, config, env, device, **kwargs): super().__init__(config, env, device, **kwargs) self.actor = Actor(config, env, device, **kwargs) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=config['agent.actor.lr']) self.critic = Critic(config, env, device, **kwargs) self.critic_target = Critic(config, env, device, **kwargs) self.critic_target.load_state_dict(self.critic.state_dict()) self.critic_target.eval() self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=config['agent.critic.lr']) self.target_entropy = -float(flatdim(env.action_space)) self.log_alpha = nn.Parameter( torch.tensor(np.log(config['agent.alpha0'])).to(device)) self.log_alpha_optimizer = optim.Adam([self.log_alpha], lr=1e-3) self.optimizer_zero_grad = lambda: [ opt.zero_grad() for opt in [ self.actor_optimizer, self.critic_optimizer, self. log_alpha_optimizer ] ]
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes']) self.layer_norms = nn.ModuleList([nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']]) self.to(self.device)
def __init__(self, config, env, device, **kwargs): super().__init__(config, env, device, **kwargs) self.feature_network = MLP(config, env, device, **kwargs) feature_dim = config['nn.sizes'][-1] if isinstance(env.action_space, Discrete): self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs) elif isinstance(env.action_space, Box): self.action_head = DiagGaussianHead(feature_dim, flatdim(env.action_space), device, config['agent.std0'], **kwargs) self.total_timestep = 0
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes']) for layer in self.feature_layers: ortho_init(layer, nonlinearity='tanh', constant_bias=0.0) self.to(self.device)
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes']) for layer in self.feature_layers: ortho_init(layer, nonlinearity='tanh', constant_bias=0.0) feature_dim = config['nn.sizes'][-1] if isinstance(env.action_space, Discrete): self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs) elif isinstance(env.action_space, Box): self.action_head = DiagGaussianHead(feature_dim, flatdim(env.action_space), device, config['agent.std0'], **kwargs) self.to(self.device)
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes']) for layer in self.feature_layers: ortho_init(layer, nonlinearity='relu', constant_bias=0.0) self.layer_norms = nn.ModuleList( [nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']]) self.to(self.device)
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes']) for layer in self.feature_layers: ortho_init(layer, nonlinearity='tanh', constant_bias=0.0) feature_dim = config['nn.sizes'][-1] self.V_head = nn.Linear(feature_dim, 1) ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0) self.to(self.device)