コード例 #1
0
    def _init_network(self):
        """Initialize networks and optimizers."""
        # create actor
        self.actor = Brain(self.backbone_cfg.actor,
                           self.head_cfg.actor).to(self.device)
        self.actor_target = Brain(self.backbone_cfg.actor,
                                  self.head_cfg.actor).to(self.device)
        self.actor_target.load_state_dict(self.actor.state_dict())

        # create critic
        self.critic = Brain(self.backbone_cfg.critic,
                            self.head_cfg.critic).to(self.device)
        self.critic_target = Brain(self.backbone_cfg.critic,
                                   self.head_cfg.critic).to(self.device)
        self.critic_target.load_state_dict(self.critic.state_dict())

        # create optimizer
        self.actor_optim = optim.Adam(
            self.actor.parameters(),
            lr=self.optim_cfg.lr_actor,
            weight_decay=self.optim_cfg.weight_decay,
        )

        self.critic_optim = optim.Adam(
            self.critic.parameters(),
            lr=self.optim_cfg.lr_critic,
            weight_decay=self.optim_cfg.weight_decay,
        )

        # load the optimizer and model parameters
        if self.args.load_from is not None:
            self.load_params(self.args.load_from)
コード例 #2
0
ファイル: learner.py プロジェクト: medipixel/rl_algorithms
    def _init_network(self):
        """Initialize networks and optimizers."""
        # create actor
        if self.backbone_cfg.shared_actor_critic:
            shared_backbone = build_backbone(
                self.backbone_cfg.shared_actor_critic)
            self.actor = Brain(
                self.backbone_cfg.shared_actor_critic,
                self.head_cfg.actor,
                shared_backbone,
            )
            self.critic = Brain(
                self.backbone_cfg.shared_actor_critic,
                self.head_cfg.critic,
                shared_backbone,
            )
            self.actor = self.actor.to(self.device)
            self.critic = self.critic.to(self.device)
        else:
            self.actor = Brain(self.backbone_cfg.actor,
                               self.head_cfg.actor).to(self.device)
            self.critic = Brain(self.backbone_cfg.critic,
                                self.head_cfg.critic).to(self.device)
        self.discriminator = Discriminator(
            self.backbone_cfg.discriminator,
            self.head_cfg.discriminator,
            self.head_cfg.aciton_embedder,
        ).to(self.device)

        # create optimizer
        self.actor_optim = optim.Adam(
            self.actor.parameters(),
            lr=self.optim_cfg.lr_actor,
            weight_decay=self.optim_cfg.weight_decay,
        )

        self.critic_optim = optim.Adam(
            self.critic.parameters(),
            lr=self.optim_cfg.lr_critic,
            weight_decay=self.optim_cfg.weight_decay,
        )

        self.discriminator_optim = optim.Adam(
            self.discriminator.parameters(),
            lr=self.optim_cfg.lr_discriminator,
            weight_decay=self.optim_cfg.weight_decay,
        )

        # load model parameters
        if self.load_from is not None:
            self.load_params(self.load_from)
コード例 #3
0
def test_brain():
    """Test wheter brain make fc layer based on backbone's output size."""

    head_cfg.configs.state_size = test_state_dim
    head_cfg.configs.output_size = 8

    model = Brain(resnet_cfg, head_cfg)
    assert model.head.input_size == 16384
コード例 #4
0
ファイル: learner.py プロジェクト: zivzone/rl_algorithms
    def _init_network(self):
        """Initialize networks and optimizers."""
        # create actor
        self.actor = Brain(self.backbone_cfg.actor,
                           self.head_cfg.actor).to(self.device)

        # create v_critic
        self.vf = Brain(self.backbone_cfg.critic_vf,
                        self.head_cfg.critic_vf).to(self.device)
        self.vf_target = Brain(self.backbone_cfg.critic_vf,
                               self.head_cfg.critic_vf).to(self.device)
        self.vf_target.load_state_dict(self.vf.state_dict())

        # create q_critic
        self.qf_1 = Brain(self.backbone_cfg.critic_qf,
                          self.head_cfg.critic_qf).to(self.device)
        self.qf_2 = Brain(self.backbone_cfg.critic_qf,
                          self.head_cfg.critic_qf).to(self.device)

        # create optimizers
        self.actor_optim = optim.Adam(
            self.actor.parameters(),
            lr=self.optim_cfg.lr_actor,
            weight_decay=self.optim_cfg.weight_decay,
        )
        self.vf_optim = optim.Adam(
            self.vf.parameters(),
            lr=self.optim_cfg.lr_vf,
            weight_decay=self.optim_cfg.weight_decay,
        )
        self.qf_1_optim = optim.Adam(
            self.qf_1.parameters(),
            lr=self.optim_cfg.lr_qf1,
            weight_decay=self.optim_cfg.weight_decay,
        )
        self.qf_2_optim = optim.Adam(
            self.qf_2.parameters(),
            lr=self.optim_cfg.lr_qf2,
            weight_decay=self.optim_cfg.weight_decay,
        )

        # load the optimizer and model parameters
        if self.args.load_from is not None:
            self.load_params(self.args.load_from)
コード例 #5
0
ファイル: learner.py プロジェクト: zivzone/rl_algorithms
    def _init_network(self):
        """Initialize networks and optimizers."""
        self.dqn = Brain(self.backbone_cfg, self.head_cfg).to(self.device)
        self.dqn_target = Brain(self.backbone_cfg,
                                self.head_cfg).to(self.device)
        self.loss_fn = build_loss(self.loss_type)

        self.dqn_target.load_state_dict(self.dqn.state_dict())

        # create optimizer
        self.dqn_optim = optim.Adam(
            self.dqn.parameters(),
            lr=self.optim_cfg.lr_dqn,
            weight_decay=self.optim_cfg.weight_decay,
            eps=self.optim_cfg.adam_eps,
        )

        # load the optimizer and model parameters
        if self.args.load_from is not None:
            self.load_params(self.args.load_from)
コード例 #6
0
ファイル: learner.py プロジェクト: medipixel/rl_algorithms
    def _init_network(self):
        """Initialize network and optimizer."""
        self.actor = Brain(self.backbone_cfg.actor, self.head_cfg.actor).to(self.device)
        self.critic = Brain(self.backbone_cfg.critic, self.head_cfg.critic).to(
            self.device
        )
        # create optimizer
        self.actor_optim = optim.Adam(
            self.actor.parameters(), lr=self.optim_cfg.lr, eps=self.optim_cfg.adam_eps
        )
        self.critic_optim = optim.Adam(
            self.critic.parameters(), lr=self.optim_cfg.lr, eps=self.optim_cfg.adam_eps
        )

        self.actor_target = Brain(self.backbone_cfg.actor, self.head_cfg.actor).to(
            self.device
        )
        self.actor_target.load_state_dict(self.actor.state_dict())

        if self.load_from is not None:
            self.load_params(self.load_from)
コード例 #7
0
ファイル: learner.py プロジェクト: singmeasong/rl_algorithms
    def _init_network(self):
        """Initialize networks and optimizers."""
        self.actor = Brain(self.backbone_cfg.actor, self.head_cfg.actor).to(self.device)
        self.critic = Brain(self.backbone_cfg.critic, self.head_cfg.critic).to(
            self.device
        )

        # create optimizer
        self.actor_optim = optim.Adam(
            self.actor.parameters(),
            lr=self.optim_cfg.lr_actor,
            weight_decay=self.optim_cfg.weight_decay,
        )

        self.critic_optim = optim.Adam(
            self.critic.parameters(),
            lr=self.optim_cfg.lr_critic,
            weight_decay=self.optim_cfg.weight_decay,
        )

        if self.load_from is not None:
            self.load_params(self.load_from)
コード例 #8
0
    def __init__(
        self,
        args: argparse.Namespace,
        env_info: ConfigDict,
        log_cfg: ConfigDict,
        comm_cfg: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
    ):
        self.args = args
        self.env_info = env_info
        self.log_cfg = log_cfg
        self.comm_cfg = comm_cfg
        self.device = torch.device("cpu")  # Logger only runs on cpu
        self.brain = Brain(backbone, head).to(self.device)

        self.update_step = 0
        self.log_info_queue = deque(maxlen=100)

        self._init_env()
コード例 #9
0
    def __init__(
        self,
        log_cfg: ConfigDict,
        comm_cfg: ConfigDict,
        backbone: ConfigDict,
        head: ConfigDict,
        env_name: str,
        is_atari: bool,
        state_size: int,
        output_size: int,
        max_update_step: int,
        episode_num: int,
        max_episode_steps: int,
        interim_test_num: int,
        is_log: bool,
        is_render: bool,
    ):
        self.log_cfg = log_cfg
        self.comm_cfg = comm_cfg
        self.device = torch.device("cpu")  # Logger only runs on cpu
        head.configs.state_size = state_size
        head.configs.output_size = output_size
        self.brain = Brain(backbone, head).to(self.device)

        self.env_name = env_name
        self.is_atari = is_atari
        self.max_update_step = max_update_step
        self.episode_num = episode_num
        self.max_episode_steps = max_episode_steps
        self.interim_test_num = interim_test_num
        self.is_log = is_log
        self.is_render = is_render

        self.update_step = 0
        self.log_info_queue = deque(maxlen=100)

        self._init_env()
コード例 #10
0
 def _init_networks(self, state_dict: OrderedDict):
     """Initialize DQN policy with learner state dict."""
     self.dqn = Brain(self.backbone_cfg, self.head_cfg).to(self.device)
     self.dqn.load_state_dict(state_dict)
     self.dqn.eval()