コード例 #1
0
                    self.hyperparameters['initial_exploration'],
                    self.hyperparameters['final_exploration'],
                    0,
                    self.hyperparameters["final_exploration_step"] - self.hyperparameters["replay_start_size"],
                    name="epsilon",
                    writer=writer,
                ),
                discount_factor=self.hyperparameters["discount_factor"],
                minibatch_size=self.hyperparameters["minibatch_size"],
                replay_start_size=self.hyperparameters["replay_start_size"],
                update_frequency=self.hyperparameters["update_frequency"],
                writer=writer
            ),
            lazy_frames=True,
            episodic_lives=True
        )

    def test_agent(self):
        q_dist = QDist(
            copy.deepcopy(self.model),
            None,
            self.n_actions,
            self.hyperparameters['atoms'],
            v_min=self.hyperparameters['v_min'],
            v_max=self.hyperparameters['v_max'],
        )
        return DeepmindAtariBody(C51TestAgent(q_dist, self.n_actions, self.hyperparameters["test_exploration"]))


c51 = PresetBuilder('c51', default_hyperparameters, C51AtariPreset)
コード例 #2
0
                                  clip_grad=self.hyperparameters["clip_grad"],
                                  writer=writer)

        v = VNetwork(self.value_model,
                     value_optimizer,
                     loss_scaling=self.hyperparameters["value_loss_scaling"],
                     clip_grad=self.hyperparameters["clip_grad"],
                     writer=writer)

        policy = SoftmaxPolicy(self.policy_model,
                               policy_optimizer,
                               clip_grad=self.hyperparameters["clip_grad"],
                               writer=writer)

        return VPG(features,
                   v,
                   policy,
                   discount_factor=self.hyperparameters["discount_factor"],
                   min_batch_size=self.hyperparameters["min_batch_size"])

    def test_agent(self):
        features = FeatureNetwork(copy.deepcopy(self.feature_model))
        policy = SoftmaxPolicy(copy.deepcopy(self.policy_model))
        return VPGTestAgent(features, policy)

    def parallel_test_agent(self):
        return self.test_agent()


vpg = PresetBuilder('vpg', default_hyperparameters, VPGClassicControlPreset)
コード例 #3
0
                self.hyperparameters['initial_exploration'],
                self.hyperparameters['final_exploration'],
                self.hyperparameters['replay_start_size'],
                self.hyperparameters['final_exploration_step'] -
                self.hyperparameters['replay_start_size'],
                name="exploration",
                writer=writer))

        replay_buffer = ExperienceReplayBuffer(
            self.hyperparameters['replay_buffer_size'], device=self.device)

        return DQN(
            q,
            policy,
            replay_buffer,
            discount_factor=self.hyperparameters['discount_factor'],
            minibatch_size=self.hyperparameters['minibatch_size'],
            replay_start_size=self.hyperparameters['replay_start_size'],
            update_frequency=self.hyperparameters['update_frequency'],
        )

    def test_agent(self):
        q = QNetwork(copy.deepcopy(self.model))
        policy = GreedyPolicy(q,
                              self.n_actions,
                              epsilon=self.hyperparameters['test_exploration'])
        return DQNTestAgent(policy)


dqn = PresetBuilder('dqn', default_hyperparameters, DQNClassicControlPreset)
コード例 #4
0
                                         writer=writer)

        replay_buffer = ExperienceReplayBuffer(
            self.hyperparameters["replay_buffer_size"], device=self.device)

        return TimeFeature(
            SAC(policy,
                q_1,
                q_2,
                v,
                replay_buffer,
                temperature_initial=self.
                hyperparameters["temperature_initial"],
                entropy_target=(
                    -self.action_space.shape[0] *
                    self.hyperparameters["entropy_target_scaling"]),
                lr_temperature=self.hyperparameters["lr_temperature"],
                replay_start_size=self.hyperparameters["replay_start_size"],
                discount_factor=self.hyperparameters["discount_factor"],
                update_frequency=self.hyperparameters["update_frequency"],
                minibatch_size=self.hyperparameters["minibatch_size"],
                writer=writer))

    def test_agent(self):
        policy = SoftDeterministicPolicy(copy.deepcopy(self.policy_model),
                                         space=self.action_space)
        return TimeFeature(SACTestAgent(policy))


sac = PresetBuilder('sac', default_hyperparameters, SACContinuousPreset)
コード例 #5
0
                name="exploration",
                writer=writer))

        replay_buffer = PrioritizedReplayBuffer(
            self.hyperparameters['replay_buffer_size'],
            alpha=self.hyperparameters['alpha'],
            beta=self.hyperparameters['beta'],
            device=self.device)

        return DeepmindAtariBody(DDQN(
            q,
            policy,
            replay_buffer,
            loss=weighted_smooth_l1_loss,
            discount_factor=self.hyperparameters["discount_factor"],
            minibatch_size=self.hyperparameters["minibatch_size"],
            replay_start_size=self.hyperparameters["replay_start_size"],
            update_frequency=self.hyperparameters["update_frequency"],
        ),
                                 lazy_frames=True)

    def test_agent(self):
        q = QNetwork(copy.deepcopy(self.model))
        policy = GreedyPolicy(q,
                              self.n_actions,
                              epsilon=self.hyperparameters['test_exploration'])
        return DeepmindAtariBody(DDQNTestAgent(policy))


ddqn = PresetBuilder('ddqn', default_hyperparameters, DDQNAtariPreset)
コード例 #6
0
            target=PolyakTarget(self.hyperparameters["polyak_rate"]),
            scheduler=CosineAnnealingLR(policy_optimizer, n_updates),
            writer=writer)

        replay_buffer = ExperienceReplayBuffer(
            self.hyperparameters["replay_buffer_size"], device=self.device)

        return TimeFeature(
            DDPG(
                q,
                policy,
                replay_buffer,
                self.action_space,
                noise=self.hyperparameters["noise"],
                replay_start_size=self.hyperparameters["replay_start_size"],
                discount_factor=self.hyperparameters["discount_factor"],
                update_frequency=self.hyperparameters["update_frequency"],
                minibatch_size=self.hyperparameters["minibatch_size"],
            ))

    def test_agent(self):
        policy = DeterministicPolicy(
            copy.deepcopy(self.policy_model),
            None,
            self.action_space,
        )
        return TimeFeature(DDPGTestAgent(policy))


ddpg = PresetBuilder('ddpg', default_hyperparameters, DDPGContinuousPreset)
コード例 #7
0
        )

        v = VNetwork(
            self.value_model,
            value_optimizer,
            scheduler=CosineAnnealingLR(value_optimizer, n_updates),
            loss_scaling=self.hyperparameters["value_loss_scaling"],
            clip_grad=self.hyperparameters["clip_grad"],
            writer=writer
        )

        policy = SoftmaxPolicy(
            self.policy_model,
            policy_optimizer,
            scheduler=CosineAnnealingLR(policy_optimizer, n_updates),
            clip_grad=self.hyperparameters["clip_grad"],
            writer=writer
        )

        return DeepmindAtariBody(
            VPG(features, v, policy, discount_factor=self.hyperparameters["discount_factor"], min_batch_size=self.hyperparameters["min_batch_size"]),
        )

    def test_agent(self):
        features = FeatureNetwork(copy.deepcopy(self.feature_model))
        policy = SoftmaxPolicy(copy.deepcopy(self.policy_model))
        return DeepmindAtariBody(VPGTestAgent(features, policy))


vpg = PresetBuilder('vpg', default_hyperparameters, VPGAtariPreset)
コード例 #8
0
            q_dist,
            replay_buffer,
            exploration=LinearScheduler(
                self.hyperparameters['initial_exploration'],
                self.hyperparameters['final_exploration'],
                0,
                train_steps - self.hyperparameters['replay_start_size'],
                name="exploration",
                writer=writer
            ),
            discount_factor=self.hyperparameters['discount_factor'] ** self.hyperparameters["n_steps"],
            minibatch_size=self.hyperparameters['minibatch_size'],
            replay_start_size=self.hyperparameters['replay_start_size'],
            update_frequency=self.hyperparameters['update_frequency'],
            writer=writer,
        )

    def test_agent(self):
        q_dist = QDist(
            copy.deepcopy(self.model),
            None,
            self.n_actions,
            self.hyperparameters['atoms'],
            v_min=self.hyperparameters['v_min'],
            v_max=self.hyperparameters['v_max'],
        )
        return RainbowTestAgent(q_dist, self.n_actions, self.hyperparameters["test_exploration"])


rainbow = PresetBuilder('rainbow', default_hyperparameters, RainbowClassicControlPreset)
コード例 #9
0
                self.hyperparameters['final_exploration'],
                0,
                train_steps - self.hyperparameters['replay_start_size'],
                name="exploration",
                writer=writer),
            discount_factor=self.hyperparameters['discount_factor']**
            self.hyperparameters["n_steps"],
            minibatch_size=self.hyperparameters['minibatch_size'],
            replay_start_size=self.hyperparameters['replay_start_size'],
            update_frequency=self.hyperparameters['update_frequency'],
            writer=writer,
        ),
                                 lazy_frames=True,
                                 episodic_lives=True)

    def test_agent(self):
        q_dist = QDist(
            copy.deepcopy(self.model),
            None,
            self.n_actions,
            self.hyperparameters['atoms'],
            v_min=self.hyperparameters['v_min'],
            v_max=self.hyperparameters['v_max'],
        )
        return DeepmindAtariBody(
            RainbowTestAgent(q_dist, self.n_actions,
                             self.hyperparameters["test_exploration"]))


rainbow = PresetBuilder('rainbow', default_hyperparameters, RainbowAtariPreset)
コード例 #10
0
                   replay_buffer,
                   exploration=LinearScheduler(
                       self.hyperparameters['initial_exploration'],
                       self.hyperparameters['final_exploration'],
                       0,
                       self.hyperparameters["final_exploration_step"] -
                       self.hyperparameters["replay_start_size"],
                       name="epsilon",
                       writer=writer,
                   ),
                   discount_factor=self.hyperparameters["discount_factor"],
                   minibatch_size=self.hyperparameters["minibatch_size"],
                   replay_start_size=self.hyperparameters["replay_start_size"],
                   update_frequency=self.hyperparameters["update_frequency"],
                   writer=writer)

    def test_agent(self):
        q_dist = QDist(
            copy.deepcopy(self.model),
            None,
            self.n_actions,
            self.hyperparameters['atoms'],
            v_min=self.hyperparameters['v_min'],
            v_max=self.hyperparameters['v_max'],
        )
        return C51TestAgent(q_dist, self.n_actions,
                            self.hyperparameters["test_exploration"])


c51 = PresetBuilder('c51', default_hyperparameters, C51ClassicControlPreset)