Esempio n. 1
0
            writer=writer
        )

        policy = ParallelGreedyPolicy(
            q,
            self.n_actions,
            epsilon=LinearScheduler(
                self.hyperparameters['initial_exploration'],
                self.hyperparameters['final_exploration'],
                0,
                self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"],
                name="exploration",
                writer=writer
            )
        )

        return VQN(q, policy, discount_factor=self.hyperparameters['discount_factor'])

    def test_agent(self):
        q = QNetwork(copy.deepcopy(self.model))
        policy = GreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters["test_exploration"])
        return VQNTestAgent(policy)

    def parallel_test_agent(self):
        q = QNetwork(copy.deepcopy(self.model))
        policy = ParallelGreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters["test_exploration"])
        return VQNTestAgent(policy)


vqn = ParallelPresetBuilder('vqn', default_hyperparameters, VQNClassicControlPreset)
Esempio n. 2
0
            PPO(
                features,
                v,
                policy,
                epsilon=LinearScheduler(
                    self.hyperparameters["clip_initial"],
                    self.hyperparameters["clip_final"],
                    0,
                    n_updates,
                    name='clip',
                    writer=writer
                ),
                epochs=self.hyperparameters["epochs"],
                minibatches=self.hyperparameters["minibatches"],
                n_envs=self.hyperparameters["n_envs"],
                n_steps=self.hyperparameters["n_steps"],
                discount_factor=self.hyperparameters["discount_factor"],
                lam=self.hyperparameters["lam"],
                entropy_loss_scaling=self.hyperparameters["entropy_loss_scaling"],
                writer=writer,
            )
        )

    def test_agent(self):
        features = FeatureNetwork(copy.deepcopy(self.feature_model))
        policy = SoftmaxPolicy(copy.deepcopy(self.policy_model))
        return DeepmindAtariBody(PPOTestAgent(features, policy))


ppo = ParallelPresetBuilder('ppo', default_hyperparameters, PPOAtariPreset)
Esempio n. 3
0
                                  clip_grad=self.hyperparameters["clip_grad"],
                                  writer=writer)

        v = VNetwork(self.value_model,
                     value_optimizer,
                     loss_scaling=self.hyperparameters["value_loss_scaling"],
                     clip_grad=self.hyperparameters["clip_grad"],
                     writer=writer)

        policy = SoftmaxPolicy(self.policy_model,
                               policy_optimizer,
                               clip_grad=self.hyperparameters["clip_grad"],
                               writer=writer)

        return VAC(features,
                   v,
                   policy,
                   discount_factor=self.hyperparameters["discount_factor"])

    def test_agent(self):
        features = FeatureNetwork(copy.deepcopy(self.feature_model))
        policy = SoftmaxPolicy(copy.deepcopy(self.policy_model))
        return VACTestAgent(features, policy)

    def parallel_test_agent(self):
        return self.test_agent()


vac = ParallelPresetBuilder('vac', default_hyperparameters,
                            VACClassicControlPreset)
Esempio n. 4
0
            v,
            policy,
            epsilon=LinearScheduler(
                self.hyperparameters["clip_initial"],
                self.hyperparameters["clip_final"],
                0,
                n_updates,
                name='clip',
                writer=writer
            ),
            epochs=self.hyperparameters["epochs"],
            minibatches=self.hyperparameters["minibatches"],
            n_envs=self.hyperparameters["n_envs"],
            n_steps=self.hyperparameters["n_steps"],
            discount_factor=self.hyperparameters["discount_factor"],
            lam=self.hyperparameters["lam"],
            entropy_loss_scaling=self.hyperparameters["entropy_loss_scaling"],
            writer=writer,
        )

    def test_agent(self):
        features = FeatureNetwork(copy.deepcopy(self.feature_model))
        policy = SoftmaxPolicy(copy.deepcopy(self.policy_model))
        return PPOTestAgent(features, policy)

    def parallel_test_agent(self):
        return self.test_agent()


ppo = ParallelPresetBuilder('ppo', default_hyperparameters, PPOClassicControlPreset)
Esempio n. 5
0
                     scheduler=CosineAnnealingLR(value_optimizer, n_updates),
                     loss_scaling=self.hyperparameters["value_loss_scaling"],
                     clip_grad=self.hyperparameters["clip_grad"],
                     writer=writer)

        policy = SoftmaxPolicy(self.policy_model,
                               policy_optimizer,
                               scheduler=CosineAnnealingLR(
                                   policy_optimizer, n_updates),
                               clip_grad=self.hyperparameters["clip_grad"],
                               writer=writer)

        return DeepmindAtariBody(
            A2C(features,
                v,
                policy,
                n_envs=self.hyperparameters["n_envs"],
                n_steps=self.hyperparameters["n_steps"],
                discount_factor=self.hyperparameters["discount_factor"],
                entropy_loss_scaling=self.
                hyperparameters["entropy_loss_scaling"],
                writer=writer), )

    def test_agent(self):
        features = FeatureNetwork(copy.deepcopy(self.feature_model))
        policy = SoftmaxPolicy(copy.deepcopy(self.policy_model))
        return DeepmindAtariBody(A2CTestAgent(features, policy))


a2c = ParallelPresetBuilder('a2c', default_hyperparameters, A2CAtariPreset)
        )

        q = QNetwork(
            self.model,
            optimizer,
            scheduler=CosineAnnealingLR(optimizer, n_updates),
            writer=writer
        )

        policy = ParallelGreedyPolicy(
            q,
            self.n_actions,
            epsilon=LinearScheduler(
                self.hyperparameters['initial_exploration'],
                self.hyperparameters['final_exploration'],
                0,
                self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"],
                name="exploration",
                writer=writer
            )
        )

        return VSarsa(q, policy, discount_factor=self.hyperparameters['discount_factor'])

    def test_agent(self):
        q = QNetwork(copy.deepcopy(self.model))
        return VSarsaTestAgent(q, self.n_actions, exploration=self.hyperparameters['test_exploration'])


vsarsa = ParallelPresetBuilder('vsarsa', default_hyperparameters, VSarsaClassicControlPreset)
Esempio n. 7
0
        v = VNetwork(self.value_model,
                     value_optimizer,
                     scheduler=CosineAnnealingLR(value_optimizer, n_updates),
                     loss_scaling=self.hyperparameters["value_loss_scaling"],
                     clip_grad=self.hyperparameters["clip_grad"],
                     writer=writer)

        policy = SoftmaxPolicy(self.policy_model,
                               policy_optimizer,
                               scheduler=CosineAnnealingLR(
                                   policy_optimizer, n_updates),
                               clip_grad=self.hyperparameters["clip_grad"],
                               writer=writer)

        return DeepmindAtariBody(
            VAC(features,
                v,
                policy,
                discount_factor=self.hyperparameters["discount_factor"]), )

    def test_agent(self):
        features = FeatureNetwork(copy.deepcopy(self.feature_model))
        policy = SoftmaxPolicy(copy.deepcopy(self.policy_model))
        return DeepmindAtariBody(VACTestAgent(features, policy))

    def parallel_test_agent(self):
        return self.test_agent()


vac = ParallelPresetBuilder('vac', default_hyperparameters, VACAtariPreset)
Esempio n. 8
0
                     optimizer,
                     scheduler=CosineAnnealingLR(optimizer, n_updates),
                     writer=writer)

        policy = ParallelGreedyPolicy(
            q,
            self.n_actions,
            epsilon=LinearScheduler(
                self.hyperparameters['initial_exploration'],
                self.hyperparameters['final_exploration'],
                0,
                self.hyperparameters["final_exploration_step"] /
                self.hyperparameters["n_envs"],
                name="exploration",
                writer=writer))

        return DeepmindAtariBody(
            VQN(q,
                policy,
                discount_factor=self.hyperparameters['discount_factor']), )

    def test_agent(self):
        q = QNetwork(copy.deepcopy(self.model))
        return DeepmindAtariBody(
            VQNTestAgent(q,
                         self.n_actions,
                         exploration=self.hyperparameters['test_exploration']))


vqn = ParallelPresetBuilder('vqn', default_hyperparameters, VQNAtariPreset)
Esempio n. 9
0
                                  clip_grad=self.hyperparameters["clip_grad"])

        v = VNetwork(self.value_model,
                     value_optimizer,
                     clip_grad=self.hyperparameters["clip_grad"],
                     writer=writer)

        policy = SoftmaxPolicy(self.policy_model,
                               policy_optimizer,
                               clip_grad=self.hyperparameters["clip_grad"],
                               writer=writer)

        return A2C(
            features,
            v,
            policy,
            n_envs=self.hyperparameters["n_envs"],
            n_steps=self.hyperparameters["n_steps"],
            discount_factor=self.hyperparameters["discount_factor"],
            entropy_loss_scaling=self.hyperparameters["entropy_loss_scaling"],
            writer=writer)

    def test_agent(self):
        features = FeatureNetwork(copy.deepcopy(self.feature_model))
        policy = SoftmaxPolicy(copy.deepcopy(self.policy_model))
        return A2CTestAgent(features, policy)


a2c = ParallelPresetBuilder('a2c', default_hyperparameters,
                            A2CClassicControlPreset)
Esempio n. 10
0
        return TimeFeature(
            PPO(
                features,
                v,
                policy,
                epsilon=LinearScheduler(self.hyperparameters['clip_initial'],
                                        self.hyperparameters['clip_final'],
                                        0,
                                        n_updates,
                                        name='clip',
                                        writer=writer),
                epochs=self.hyperparameters['epochs'],
                minibatches=self.hyperparameters['minibatches'],
                n_envs=self.hyperparameters['n_envs'],
                n_steps=self.hyperparameters['n_steps'],
                discount_factor=self.hyperparameters['discount_factor'],
                lam=self.hyperparameters['lam'],
                entropy_loss_scaling=self.
                hyperparameters['entropy_loss_scaling'],
                writer=writer,
            ))

    def test_agent(self):
        policy = GaussianPolicy(copy.deepcopy(self.policy_model),
                                space=self.action_space)
        return TimeFeature(PPOTestAgent(Identity(self.device), policy))


ppo = ParallelPresetBuilder('ppo', default_hyperparameters,
                            PPOContinuousPreset)
Esempio n. 11
0
            optimizer,
            scheduler=CosineAnnealingLR(optimizer, n_updates),
            writer=writer
        )

        policy = ParallelGreedyPolicy(
            q,
            self.n_actions,
            epsilon=LinearScheduler(
                self.hyperparameters['initial_exploration'],
                self.hyperparameters['final_exploration'],
                0,
                self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"],
                name="exploration",
                writer=writer
            )
        )

        return DeepmindAtariBody(
            VSarsa(q, policy, discount_factor=self.hyperparameters['discount_factor']),
        )

    def test_agent(self):
        q = QNetwork(copy.deepcopy(self.model))
        return DeepmindAtariBody(
            VSarsaTestAgent(q, self.n_actions, exploration=self.hyperparameters['test_exploration'])
        )


vsarsa = ParallelPresetBuilder('vsarsa', default_hyperparameters, VSarsaAtariPreset)