コード例 #1
0
    def test_get_model(self):
        """
        test getting policy, value and AC models
        """
        ac = get_model("ac", "mlp")
        p = get_model("p", "mlp")
        v = get_model("v", "mlp")
        v_ = get_model("v", "cnn")

        assert ac == MlpActorCritic
        assert p == MlpPolicy
        assert v == MlpValue
        assert v_ == CnnValue
コード例 #2
0
ファイル: ppo1.py プロジェクト: hades-rp2010/genrl
    def _create_model(self):
        """Function to initialize Actor-Critic architecture

        This will create the Actor-Critic net for the agent and initialise the action noise
        """
        # Instantiate networks and optimizers
        state_dim, action_dim, discrete, action_lim = get_env_properties(
            self.env, self.network)
        if isinstance(self.network, str):
            arch = self.network
            if self.shared_layers is not None:
                arch += "s"
            self.ac = get_model("ac", arch)(
                state_dim,
                action_dim,
                shared_layers=self.shared_layers,
                policy_layers=self.policy_layers,
                value_layers=self.value_layers,
                val_typ="V",
                discrete=discrete,
                action_lim=action_lim,
                activation=self.activation,
            ).to(self.device)
        else:
            self.ac = self.network.to(self.device)

        actor_params, critic_params = self.ac.get_params()
        self.optimizer_policy = opt.Adam(actor_params, lr=self.lr_policy)
        self.optimizer_value = opt.Adam(critic_params, lr=self.lr_value)
コード例 #3
0
ファイル: a2c.py プロジェクト: aiexplorations/genrl
    def _create_model(self) -> None:
        """Function to initialize Actor-Critic architecture

        This will create the Actor-Critic net for the agent and initialise the action noise
        """
        state_dim, action_dim, discrete, action_lim = get_env_properties(
            self.env, self.network)
        if isinstance(self.network, str):
            self.ac = get_model("ac", self.network)(
                state_dim,
                action_dim,
                policy_layers=self.policy_layers,
                value_layers=self.value_layers,
                val_type="V",
                discrete=discrete,
                action_lim=action_lim,
            ).to(self.device)
        else:
            self.ac = self.network.to(self.device)

            # action_dim = self.network.action_dim

        if self.noise is not None:
            self.noise = self.noise(np.zeros_like(action_dim),
                                    self.noise_std * np.ones_like(action_dim))

        self.optimizer_policy = opt.Adam(self.ac.actor.parameters(),
                                         lr=self.lr_policy)
        self.optimizer_value = opt.Adam(self.ac.critic.parameters(),
                                        lr=self.lr_value)
コード例 #4
0
ファイル: ddpg.py プロジェクト: aiexplorations/genrl
    def _create_model(self) -> None:
        """Function to initialize Actor-Critic architecture

        This will create the Actor-Critic net for the agent and initialise the action noise
        """
        state_dim, action_dim, discrete, _ = get_env_properties(
            self.env, self.network)
        if discrete:
            raise Exception(
                "Discrete Environments not supported for {}.".format(
                    __class__.__name__))
        if self.noise is not None:
            self.noise = self.noise(np.zeros_like(action_dim),
                                    self.noise_std * np.ones_like(action_dim))

        if isinstance(self.network, str):
            self.ac = get_model("ac", self.network)(
                state_dim,
                action_dim,
                self.policy_layers,
                self.value_layers,
                "Qsa",
                False,
            ).to(self.device)
        else:
            self.ac = self.network

        self.ac_target = deepcopy(self.ac).to(self.device)

        self.optimizer_policy = opt.Adam(self.ac.actor.parameters(),
                                         lr=self.lr_policy)
        self.optimizer_value = opt.Adam(self.ac.critic.parameters(),
                                        lr=self.lr_value)
コード例 #5
0
ファイル: ppo1.py プロジェクト: threewisemonkeys-as/genrl
    def _create_model(self):
        """Function to initialize Actor-Critic architecture

        This will create the Actor-Critic net for the agent and initialise the action noise
        """
        # Instantiate networks and optimizers
        state_dim, action_dim, discrete, action_lim = get_env_properties(
            self.env, self.network)
        if isinstance(self.network, str):
            self.ac = get_model("ac", self.network)(
                state_dim,
                action_dim,
                policy_layers=self.policy_layers,
                value_layers=self.value_layers,
                val_typ="V",
                discrete=discrete,
                action_lim=action_lim,
                activation=self.activation,
            ).to(self.device)
        else:
            self.ac = self.network.to(self.device)

        self.optimizer_policy = opt.Adam(self.ac.actor.parameters(),
                                         lr=self.lr_policy)
        self.optimizer_value = opt.Adam(self.ac.critic.parameters(),
                                        lr=self.lr_value)
コード例 #6
0
ファイル: sac.py プロジェクト: threewisemonkeys-as/genrl
    def _create_model(self, **kwargs) -> None:
        """Initializes class objects

        Initializes actor-critic architecture, replay buffer and optimizers
        """
        if self.env.action_space is None:
            self.action_scale = torch.FloatTensor(1.0)
            self.action_bias = torch.FloatTensor(0.0)
        else:
            self.action_scale = torch.FloatTensor(
                (self.env.action_space.high - self.env.action_space.low) / 2.0
            )
            self.action_bias = torch.FloatTensor(
                (self.env.action_space.high + self.env.action_space.low) / 2.0
            )

        if isinstance(self.network, str):
            state_dim, action_dim, discrete, _ = get_env_properties(
                self.env, self.network
            )

            self.ac = get_model("ac", self.network + "12")(
                state_dim,
                action_dim,
                policy_layers=self.policy_layers,
                value_layers=self.value_layers,
                val_type="Qsa",
                discrete=False,
                sac=True,
                action_scale=self.action_scale,
                action_bias=self.action_bias,
            )
        else:
            self.model = self.network

        self.ac_target = deepcopy(self.ac)

        self.critic_params = list(self.ac.critic1.parameters()) + list(
            self.ac.critic2.parameters()
        )

        self.optimizer_value = opt.Adam(self.critic_params, self.lr_value)
        self.optimizer_policy = opt.Adam(self.ac.actor.parameters(), self.lr_policy)

        if self.entropy_tuning:
            self.target_entropy = -torch.prod(
                torch.Tensor(self.env.action_space.shape)
            ).item()
            self.log_alpha = torch.zeros(1, requires_grad=True)
            self.optimizer_alpha = opt.Adam([self.log_alpha], lr=self.lr_policy)
コード例 #7
0
    def _create_model(self):
        """Initialize policy network"""
        state_dim, action_dim, discrete, action_lim = get_env_properties(
            self.env, self.network)
        if isinstance(self.network, str):
            # Instantiate networks and optimizers
            self.actor = get_model("p", self.network)(
                state_dim,
                action_dim,
                self.policy_layers,
                "V",
                discrete,
                action_lim=action_lim,
            ).to(self.device)
        else:
            self.actor = self.network.to(self.device)

        self.optimizer_policy = opt.Adam(self.actor.parameters(),
                                         lr=self.lr_policy)
コード例 #8
0
ファイル: base.py プロジェクト: veds12/genrl
    def _create_model(self, *args, **kwargs) -> None:
        """Function to initialize Q-value model

        This will create the Q-value function of the agent.
        """
        state_dim, action_dim, discrete, _ = get_env_properties(
            self.env, self.network)
        if not discrete:
            raise Exception("Only Discrete Environments are supported for DQN")

        if isinstance(self.network, str):
            self.model = get_model("v", self.network + self.dqn_type)(
                state_dim, action_dim, "Qs", self.value_layers, **kwargs)
        else:
            self.model = self.network

        self.target_model = deepcopy(self.model)

        self.optimizer = opt.Adam(self.model.parameters(), lr=self.lr_value)
コード例 #9
0
ファイル: td3.py プロジェクト: tzadouri/genrl
    def _create_model(self) -> None:
        """Initializes class objects

        Initializes actor-critic architecture, replay buffer and optimizers
        """
        state_dim, action_dim, discrete, _ = get_env_properties(
            self.env, self.network)
        if discrete:
            raise Exception(
                "Discrete Environments not supported for {}.".format(
                    __class__.__name__))

        if isinstance(self.network, str):
            arch = self.network + "12"
            if self.shared_layers is not None:
                arch += "s"
            self.ac = get_model("ac", arch)(
                state_dim,
                action_dim,
                shared_layers=self.shared_layers,
                policy_layers=self.policy_layers,
                value_layers=self.value_layers,
                val_type="Qsa",
                discrete=False,
            )
        else:
            self.ac = self.network

        if self.noise is not None:
            self.noise = self.noise(torch.zeros(action_dim),
                                    self.noise_std * torch.ones(action_dim))

        self.ac_target = deepcopy(self.ac)
        actor_params, critic_params = self.ac.get_params()
        self.optimizer_value = torch.optim.Adam(critic_params,
                                                lr=self.lr_value)
        self.optimizer_policy = torch.optim.Adam(actor_params,
                                                 lr=self.lr_policy)
コード例 #10
0
    def _create_model(self) -> None:
        """Initializes class objects

        Initializes actor-critic architecture, replay buffer and optimizers
        """
        state_dim, action_dim, discrete, _ = get_env_properties(self.env, self.network)
        if discrete:
            raise Exception(
                "Discrete Environments not supported for {}.".format(__class__.__name__)
            )

        if isinstance(self.network, str):
            # Below, the "12" corresponds to the Single Actor, Double Critic network architecture
            self.ac = get_model("ac", self.network + "12")(
                state_dim,
                action_dim,
                policy_layers=self.policy_layers,
                value_layers=self.value_layers,
                val_type="Qsa",
                discrete=False,
            )
        else:
            self.ac = self.network

        if self.noise is not None:
            self.noise = self.noise(
                np.zeros_like(action_dim), self.noise_std * np.ones_like(action_dim)
            )

        self.ac_target = deepcopy(self.ac)

        self.critic_params = list(self.ac.critic1.parameters()) + list(
            self.ac.critic2.parameters()
        )
        self.optimizer_value = torch.optim.Adam(self.critic_params, lr=self.lr_value)
        self.optimizer_policy = torch.optim.Adam(
            self.ac.actor.parameters(), lr=self.lr_policy
        )