def _initialize(self): """Initialize networks, optimizer, loss function.""" # Set env-specific input dims and output dims for models self.model_cfg.critic.params.model_cfg.state_dim = ( self.model_cfg.actor.params.model_cfg.state_dim ) = self.experiment_info.env.state_dim self.model_cfg.critic.params.model_cfg.action_dim = ( self.model_cfg.actor.params.model_cfg.action_dim ) = self.experiment_info.env.action_dim # Initialize critic models, optimizers, and loss function self.critic = build_model(self.model_cfg.critic, self.device) self.critic_optimizer = optim.Adam( self.critic.parameters(), lr=self.hyper_params.critic_learning_rate) self.critic_loss_fn = build_loss( self.experiment_info.critic_loss, self.hyper_params, self.experiment_info.device, ) # Initialize actor model, optimizer, and loss function self.actor = build_model(self.model_cfg.actor, self.device) self.actor_optimizer = optim.Adam( self.actor.parameters(), lr=self.hyper_params.actor_learning_rate) self.actor_loss_fn = build_loss( self.experiment_info.actor_loss, self.hyper_params, self.experiment_info.device, )
def _initialize(self): """initialize networks, optimizer, loss function""" self.network = build_model(self.model_cfg, self.device) self.target_network = build_model(self.model_cfg, self.device) hard_update(self.network, self.target_network) self.optimizer = optim.Adam(self.network.parameters(), lr=self.hyper_params.learning_rate) self.loss_fn = build_loss(self.experiment_info.loss, self.hyper_params, self.experiment_info.device)
def _initialize(self): """initialize networks, optimizer, loss function, alpha (entropy temperature)""" # Set env-specific input dims and output dims for models self.model_cfg.critic.params.model_cfg.state_dim = ( self.model_cfg.actor.params.model_cfg.state_dim ) = self.experiment_info.env.state_dim self.model_cfg.critic.params.model_cfg.action_dim = ( self.model_cfg.actor.params.model_cfg.action_dim ) = self.experiment_info.env.action_dim # Initialize critic models, optimizers, and loss function self.critic1 = build_model(self.model_cfg.critic, self.device) self.target_critic1 = build_model(self.model_cfg.critic, self.device) self.critic2 = build_model(self.model_cfg.critic, self.device) self.target_critic2 = build_model(self.model_cfg.critic, self.device) self.critic1_optimizer = optim.Adam( self.critic1.parameters(), lr=self.hyper_params.critic_learning_rate ) self.critic2_optimizer = optim.Adam( self.critic2.parameters(), lr=self.hyper_params.critic_learning_rate ) self.critic_loss_fn = build_loss( self.experiment_info.critic_loss, self.hyper_params, self.experiment_info.device, ) hard_update(self.critic1, self.target_critic1) hard_update(self.critic2, self.target_critic2) # Initialize actor model, optimizer, and loss function self.actor = build_model(self.model_cfg.actor, self.device) self.actor_optimizer = optim.Adam( self.actor.parameters(), lr=self.hyper_params.actor_learning_rate ) self.actor_loss_fn = build_loss( self.experiment_info.actor_loss, self.hyper_params, self.experiment_info.device, ) # entropy temperature self.alpha = self.hyper_params.alpha self.target_entropy = -torch.prod( torch.Tensor(self.experiment_info.env.action_dim).to(self.device) ).item() self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha_optim = optim.Adam( [self.log_alpha], lr=self.hyper_params.alpha_learning_rate )
def _initialize(self): """initialize networks, optimizer, loss function""" self.network = build_model(self.model_cfg, self.use_cuda) self.target_network = build_model(self.model_cfg, self.use_cuda) hard_update(self.network, self.target_network) self.optimizer = optim.Adam( self.network.parameters(), lr=self.hyper_params.learning_rate, weight_decay=self.hyper_params.weight_decay, eps=self.hyper_params.adam_eps, ) self.loss_fn = build_loss(self.experiment_info.loss, self.hyper_params, self.use_cuda)
def __init__( self, worker: TrajectoryRolloutWorker, hyper_params: DictConfig, model_cfg: DictConfig, ): self.worker = worker self.hyper_params = hyper_params self.model_cfg = model_cfg # Build critic self.critic = build_model(self.model_cfg.critic, self.worker.device) # Build loss functions self.critic_loss_fn = build_loss( self.worker.experiment_info.critic_loss, self.hyper_params, self.worker.experiment_info.device, ) self.actor_loss_fn = build_loss( self.worker.experiment_info.actor_loss, self.hyper_params, self.worker.experiment_info.device, )
def __init__(self, rank: int, experiment_info: DictConfig, policy_cfg: DictConfig): self.experiment_info = experiment_info self.rank = rank self.env = build_env(experiment_info) self.action_selector = build_action_selector(self.experiment_info) self.device = torch.device(self.experiment_info.worker_device) self.actor = build_model(policy_cfg, self.device)
def main(cfg: DictConfig): # print all configs print(cfg.pretty()) # build env print("===INITIALIZING ENV===") env = build_env(cfg.experiment_info) print(env.reset()) print("=================") # build model print("===INITIALIZING MODEL===") cfg.model.params.model_cfg.state_dim = env.observation_space.shape cfg.model.params.model_cfg.action_dim = env.action_space.n cfg.model.params.model_cfg.fc.output.params.output_size = env.action_space.n model = build_model(cfg.model) test_input = torch.FloatTensor(env.reset()).unsqueeze(0) print(model) print(model.forward(test_input)) print("===================") # build action_selector print("===INITIALIZING ACTION SELECTOR===") action_selector = build_action_selector(cfg.experiment_info) print(action_selector) print("==============================") # build loss print("===INITIALIZING LOSS===") loss = build_loss(cfg.experiment_info) print(loss) print("==================") # build learner print("===INITIALIZING LEARNER===") learner = build_learner(**cfg) print(learner) print("=====================") # build agent print("===INITIALIZING AGENT===") agent = build_agent(**cfg) print(agent) print("=====================")