def _init_network(self): """Initialize networks and optimizers.""" # create actor self.actor = Brain(self.backbone_cfg.actor, self.head_cfg.actor).to(self.device) self.actor_target = Brain(self.backbone_cfg.actor, self.head_cfg.actor).to(self.device) self.actor_target.load_state_dict(self.actor.state_dict()) # create critic self.critic = Brain(self.backbone_cfg.critic, self.head_cfg.critic).to(self.device) self.critic_target = Brain(self.backbone_cfg.critic, self.head_cfg.critic).to(self.device) self.critic_target.load_state_dict(self.critic.state_dict()) # create optimizer self.actor_optim = optim.Adam( self.actor.parameters(), lr=self.optim_cfg.lr_actor, weight_decay=self.optim_cfg.weight_decay, ) self.critic_optim = optim.Adam( self.critic.parameters(), lr=self.optim_cfg.lr_critic, weight_decay=self.optim_cfg.weight_decay, ) # load the optimizer and model parameters if self.args.load_from is not None: self.load_params(self.args.load_from)
def _init_network(self): """Initialize networks and optimizers.""" # create actor if self.backbone_cfg.shared_actor_critic: shared_backbone = build_backbone( self.backbone_cfg.shared_actor_critic) self.actor = Brain( self.backbone_cfg.shared_actor_critic, self.head_cfg.actor, shared_backbone, ) self.critic = Brain( self.backbone_cfg.shared_actor_critic, self.head_cfg.critic, shared_backbone, ) self.actor = self.actor.to(self.device) self.critic = self.critic.to(self.device) else: self.actor = Brain(self.backbone_cfg.actor, self.head_cfg.actor).to(self.device) self.critic = Brain(self.backbone_cfg.critic, self.head_cfg.critic).to(self.device) self.discriminator = Discriminator( self.backbone_cfg.discriminator, self.head_cfg.discriminator, self.head_cfg.aciton_embedder, ).to(self.device) # create optimizer self.actor_optim = optim.Adam( self.actor.parameters(), lr=self.optim_cfg.lr_actor, weight_decay=self.optim_cfg.weight_decay, ) self.critic_optim = optim.Adam( self.critic.parameters(), lr=self.optim_cfg.lr_critic, weight_decay=self.optim_cfg.weight_decay, ) self.discriminator_optim = optim.Adam( self.discriminator.parameters(), lr=self.optim_cfg.lr_discriminator, weight_decay=self.optim_cfg.weight_decay, ) # load model parameters if self.load_from is not None: self.load_params(self.load_from)
def test_brain(): """Test wheter brain make fc layer based on backbone's output size.""" head_cfg.configs.state_size = test_state_dim head_cfg.configs.output_size = 8 model = Brain(resnet_cfg, head_cfg) assert model.head.input_size == 16384
def _init_network(self): """Initialize networks and optimizers.""" # create actor self.actor = Brain(self.backbone_cfg.actor, self.head_cfg.actor).to(self.device) # create v_critic self.vf = Brain(self.backbone_cfg.critic_vf, self.head_cfg.critic_vf).to(self.device) self.vf_target = Brain(self.backbone_cfg.critic_vf, self.head_cfg.critic_vf).to(self.device) self.vf_target.load_state_dict(self.vf.state_dict()) # create q_critic self.qf_1 = Brain(self.backbone_cfg.critic_qf, self.head_cfg.critic_qf).to(self.device) self.qf_2 = Brain(self.backbone_cfg.critic_qf, self.head_cfg.critic_qf).to(self.device) # create optimizers self.actor_optim = optim.Adam( self.actor.parameters(), lr=self.optim_cfg.lr_actor, weight_decay=self.optim_cfg.weight_decay, ) self.vf_optim = optim.Adam( self.vf.parameters(), lr=self.optim_cfg.lr_vf, weight_decay=self.optim_cfg.weight_decay, ) self.qf_1_optim = optim.Adam( self.qf_1.parameters(), lr=self.optim_cfg.lr_qf1, weight_decay=self.optim_cfg.weight_decay, ) self.qf_2_optim = optim.Adam( self.qf_2.parameters(), lr=self.optim_cfg.lr_qf2, weight_decay=self.optim_cfg.weight_decay, ) # load the optimizer and model parameters if self.args.load_from is not None: self.load_params(self.args.load_from)
def _init_network(self): """Initialize networks and optimizers.""" self.dqn = Brain(self.backbone_cfg, self.head_cfg).to(self.device) self.dqn_target = Brain(self.backbone_cfg, self.head_cfg).to(self.device) self.loss_fn = build_loss(self.loss_type) self.dqn_target.load_state_dict(self.dqn.state_dict()) # create optimizer self.dqn_optim = optim.Adam( self.dqn.parameters(), lr=self.optim_cfg.lr_dqn, weight_decay=self.optim_cfg.weight_decay, eps=self.optim_cfg.adam_eps, ) # load the optimizer and model parameters if self.args.load_from is not None: self.load_params(self.args.load_from)
def _init_network(self): """Initialize network and optimizer.""" self.actor = Brain(self.backbone_cfg.actor, self.head_cfg.actor).to(self.device) self.critic = Brain(self.backbone_cfg.critic, self.head_cfg.critic).to( self.device ) # create optimizer self.actor_optim = optim.Adam( self.actor.parameters(), lr=self.optim_cfg.lr, eps=self.optim_cfg.adam_eps ) self.critic_optim = optim.Adam( self.critic.parameters(), lr=self.optim_cfg.lr, eps=self.optim_cfg.adam_eps ) self.actor_target = Brain(self.backbone_cfg.actor, self.head_cfg.actor).to( self.device ) self.actor_target.load_state_dict(self.actor.state_dict()) if self.load_from is not None: self.load_params(self.load_from)
def _init_network(self): """Initialize networks and optimizers.""" self.actor = Brain(self.backbone_cfg.actor, self.head_cfg.actor).to(self.device) self.critic = Brain(self.backbone_cfg.critic, self.head_cfg.critic).to( self.device ) # create optimizer self.actor_optim = optim.Adam( self.actor.parameters(), lr=self.optim_cfg.lr_actor, weight_decay=self.optim_cfg.weight_decay, ) self.critic_optim = optim.Adam( self.critic.parameters(), lr=self.optim_cfg.lr_critic, weight_decay=self.optim_cfg.weight_decay, ) if self.load_from is not None: self.load_params(self.load_from)
def __init__( self, args: argparse.Namespace, env_info: ConfigDict, log_cfg: ConfigDict, comm_cfg: ConfigDict, backbone: ConfigDict, head: ConfigDict, ): self.args = args self.env_info = env_info self.log_cfg = log_cfg self.comm_cfg = comm_cfg self.device = torch.device("cpu") # Logger only runs on cpu self.brain = Brain(backbone, head).to(self.device) self.update_step = 0 self.log_info_queue = deque(maxlen=100) self._init_env()
def __init__( self, log_cfg: ConfigDict, comm_cfg: ConfigDict, backbone: ConfigDict, head: ConfigDict, env_name: str, is_atari: bool, state_size: int, output_size: int, max_update_step: int, episode_num: int, max_episode_steps: int, interim_test_num: int, is_log: bool, is_render: bool, ): self.log_cfg = log_cfg self.comm_cfg = comm_cfg self.device = torch.device("cpu") # Logger only runs on cpu head.configs.state_size = state_size head.configs.output_size = output_size self.brain = Brain(backbone, head).to(self.device) self.env_name = env_name self.is_atari = is_atari self.max_update_step = max_update_step self.episode_num = episode_num self.max_episode_steps = max_episode_steps self.interim_test_num = interim_test_num self.is_log = is_log self.is_render = is_render self.update_step = 0 self.log_info_queue = deque(maxlen=100) self._init_env()
def _init_networks(self, state_dict: OrderedDict): """Initialize DQN policy with learner state dict.""" self.dqn = Brain(self.backbone_cfg, self.head_cfg).to(self.device) self.dqn.load_state_dict(state_dict) self.dqn.eval()