Exemple #1
0
    def _initialize(self):
        """Initialize networks, optimizer, loss function."""
        # Set env-specific input dims and output dims for models

        self.model_cfg.critic.params.model_cfg.state_dim = (
            self.model_cfg.actor.params.model_cfg.state_dim
        ) = self.experiment_info.env.state_dim
        self.model_cfg.critic.params.model_cfg.action_dim = (
            self.model_cfg.actor.params.model_cfg.action_dim
        ) = self.experiment_info.env.action_dim

        # Initialize critic models, optimizers, and loss function
        self.critic = build_model(self.model_cfg.critic, self.device)

        self.critic_optimizer = optim.Adam(
            self.critic.parameters(),
            lr=self.hyper_params.critic_learning_rate)
        self.critic_loss_fn = build_loss(
            self.experiment_info.critic_loss,
            self.hyper_params,
            self.experiment_info.device,
        )

        # Initialize actor model, optimizer, and loss function
        self.actor = build_model(self.model_cfg.actor, self.device)
        self.actor_optimizer = optim.Adam(
            self.actor.parameters(), lr=self.hyper_params.actor_learning_rate)
        self.actor_loss_fn = build_loss(
            self.experiment_info.actor_loss,
            self.hyper_params,
            self.experiment_info.device,
        )
Exemple #2
0
    def _initialize(self):
        """initialize networks, optimizer, loss function"""
        self.network = build_model(self.model_cfg, self.device)
        self.target_network = build_model(self.model_cfg, self.device)
        hard_update(self.network, self.target_network)

        self.optimizer = optim.Adam(self.network.parameters(),
                                    lr=self.hyper_params.learning_rate)

        self.loss_fn = build_loss(self.experiment_info.loss, self.hyper_params,
                                  self.experiment_info.device)
Exemple #3
0
    def _initialize(self):
        """initialize networks, optimizer, loss function, alpha (entropy temperature)"""
        # Set env-specific input dims and output dims for models
        self.model_cfg.critic.params.model_cfg.state_dim = (
            self.model_cfg.actor.params.model_cfg.state_dim
        ) = self.experiment_info.env.state_dim
        self.model_cfg.critic.params.model_cfg.action_dim = (
            self.model_cfg.actor.params.model_cfg.action_dim
        ) = self.experiment_info.env.action_dim

        # Initialize critic models, optimizers, and loss function
        self.critic1 = build_model(self.model_cfg.critic, self.device)
        self.target_critic1 = build_model(self.model_cfg.critic, self.device)
        self.critic2 = build_model(self.model_cfg.critic, self.device)
        self.target_critic2 = build_model(self.model_cfg.critic, self.device)
        self.critic1_optimizer = optim.Adam(
            self.critic1.parameters(), lr=self.hyper_params.critic_learning_rate
        )
        self.critic2_optimizer = optim.Adam(
            self.critic2.parameters(), lr=self.hyper_params.critic_learning_rate
        )
        self.critic_loss_fn = build_loss(
            self.experiment_info.critic_loss,
            self.hyper_params,
            self.experiment_info.device,
        )

        hard_update(self.critic1, self.target_critic1)
        hard_update(self.critic2, self.target_critic2)

        # Initialize actor model, optimizer, and loss function
        self.actor = build_model(self.model_cfg.actor, self.device)
        self.actor_optimizer = optim.Adam(
            self.actor.parameters(), lr=self.hyper_params.actor_learning_rate
        )
        self.actor_loss_fn = build_loss(
            self.experiment_info.actor_loss,
            self.hyper_params,
            self.experiment_info.device,
        )

        # entropy temperature
        self.alpha = self.hyper_params.alpha
        self.target_entropy = -torch.prod(
            torch.Tensor(self.experiment_info.env.action_dim).to(self.device)
        ).item()
        self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device)
        self.alpha_optim = optim.Adam(
            [self.log_alpha], lr=self.hyper_params.alpha_learning_rate
        )
Exemple #4
0
    def _initialize(self):
        """initialize networks, optimizer, loss function"""
        self.network = build_model(self.model_cfg, self.use_cuda)
        self.target_network = build_model(self.model_cfg, self.use_cuda)
        hard_update(self.network, self.target_network)

        self.optimizer = optim.Adam(
            self.network.parameters(),
            lr=self.hyper_params.learning_rate,
            weight_decay=self.hyper_params.weight_decay,
            eps=self.hyper_params.adam_eps,
        )

        self.loss_fn = build_loss(self.experiment_info.loss, self.hyper_params,
                                  self.use_cuda)
Exemple #5
0
    def __init__(
        self,
        worker: TrajectoryRolloutWorker,
        hyper_params: DictConfig,
        model_cfg: DictConfig,
    ):
        self.worker = worker
        self.hyper_params = hyper_params
        self.model_cfg = model_cfg

        # Build critic
        self.critic = build_model(self.model_cfg.critic, self.worker.device)

        # Build loss functions
        self.critic_loss_fn = build_loss(
            self.worker.experiment_info.critic_loss,
            self.hyper_params,
            self.worker.experiment_info.device,
        )

        self.actor_loss_fn = build_loss(
            self.worker.experiment_info.actor_loss,
            self.hyper_params,
            self.worker.experiment_info.device,
        )
Exemple #6
0
    def __init__(self, rank: int, experiment_info: DictConfig,
                 policy_cfg: DictConfig):
        self.experiment_info = experiment_info

        self.rank = rank
        self.env = build_env(experiment_info)
        self.action_selector = build_action_selector(self.experiment_info)
        self.device = torch.device(self.experiment_info.worker_device)
        self.actor = build_model(policy_cfg, self.device)
Exemple #7
0
def main(cfg: DictConfig):
    # print all configs
    print(cfg.pretty())

    # build env
    print("===INITIALIZING ENV===")
    env = build_env(cfg.experiment_info)
    print(env.reset())
    print("=================")

    # build model
    print("===INITIALIZING MODEL===")
    cfg.model.params.model_cfg.state_dim = env.observation_space.shape
    cfg.model.params.model_cfg.action_dim = env.action_space.n
    cfg.model.params.model_cfg.fc.output.params.output_size = env.action_space.n
    model = build_model(cfg.model)
    test_input = torch.FloatTensor(env.reset()).unsqueeze(0)
    print(model)
    print(model.forward(test_input))
    print("===================")

    # build action_selector
    print("===INITIALIZING ACTION SELECTOR===")
    action_selector = build_action_selector(cfg.experiment_info)
    print(action_selector)
    print("==============================")

    # build loss
    print("===INITIALIZING LOSS===")
    loss = build_loss(cfg.experiment_info)
    print(loss)
    print("==================")

    # build learner
    print("===INITIALIZING LEARNER===")
    learner = build_learner(**cfg)
    print(learner)
    print("=====================")

    # build agent
    print("===INITIALIZING AGENT===")
    agent = build_agent(**cfg)
    print(agent)
    print("=====================")