def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('MountainCar-v0'))
    env.SetGymEnvMaxEpisodeSteps(400)
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(64, )),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    qfunc.Load('saved_models/mountaincar_shape_64_rmsprop_gamma_099.weights')
    policy = policy_impl.GreedyPolicy()
    runner = runner_impl.NoOpRunner()

    env.TurnOnRendering(should_render=True, fps=24)
    logging.ENV.debug_verbosity = 9

    env.StartRecording(video_filename='mountaincar_demo.mp4')
    # First 5 runs with random actions:
    rand_qfunc = qfunc_impl.RandomQFunction(env.GetActionSpaceSize())
    runner.Run(env=env, brain=rand_qfunc, policy=policy, num_of_episodes=5)
    # Then 10 runs with trained qfunc:
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10)
    env.StopRecording()
Esempio n. 2
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('CartPole-v0'))
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(20, 20, 20)),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    runner = runner_impl.ExperienceReplayRunner(
        experience_capacity=100000, experience_sample_batch_size=batch_size)

    # Train 500 episodes.
    logging.ENV.debug_verbosity = 3
    policy = policy_impl.GreedyPolicyWithRandomness(epsilon=0.1)
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=500)

    # Test for 100 episodes.
    logging.ENV.debug_verbosity = 4
    policy = policy_impl.GreedyPolicy()
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=100)

    # Demo with video.
    env.TurnOnRendering(should_render=True, fps=24)
    # env.StartRecording(video_filename='demo.mp4')  # uncomment to record video.
    # First 5 runs with random actions:
    runner.Run(env=env,
               brain=qfunc_impl.RandomQFunction(env.GetActionSpaceSize()),
               policy=policy,
               num_of_episodes=5)
    # Then 10 runs with trained qfunc:
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10)
Esempio n. 3
0
    def test_saveLoad(self):
        tmp_file = '/tmp/DQNTest_savedata.tmp'
        self.qfunc._SetValues(self.states, self.values)
        self.qfunc.Save(tmp_file)
        qfunc = qfunc_impl.DQN(model=qfunc_impl.CreateModel(
            state_shape=(3, ),
            action_space_size=2,
            hidden_layer_sizes=(6, 4),
        ))
        qfunc.Load(tmp_file)

        numpy_util.TestUtil.AssertModelWeightsEqual(qfunc._model,
                                                    self.qfunc._model)
Esempio n. 4
0
    def _RunEnv(gym_env):
        env = environment_impl.GymEnvironment(gym_env)
        env.SetGymEnvMaxEpisodeSteps(10)
        qfunc = qfunc_impl.DQN(model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(4, ),
        ))

        env.Reset()
        policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

        runner_impl.SimpleRunner().Run(env=env,
                                       qfunc=qfunc,
                                       policy=policy,
                                       num_of_episodes=10)
Esempio n. 5
0
    def setUp(self) -> None:
        # State space size is 3; Action space size is 2.
        self.qfunc = qfunc_impl.DQN(model=qfunc_impl.CreateModel(
            state_shape=(3, ),
            action_space_size=2,
            hidden_layer_sizes=(6, 4),
        ))
        self.states = numpy.array([
            [1, 2, 3],
            [4, 5, 6],
        ])

        self.values = numpy.array([
            [0.5, 0.5],
            [0.3, 0.7],
        ])
Esempio n. 6
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('Acrobot-v1'))
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(20, 20, 20)),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    qfunc.LoadModel(
        'saved_models/acrobot_v1_shape_20-20-20_rmsprop_gamma_0.99.model')
    policy = policy_impl.GreedyPolicy()
    runner = runner_impl.NoOpRunner()

    env.TurnOnRendering(should_render=True, fps=10)
    logging.ENV.debug_verbosity = 9
    runner.Run(env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)