Esempio n. 1
0
 def __init__(self):
     super(GridSearchPong, self).__init__(
         DQNPong, {
             "learning_rate": [1e-3, 1e-5],
             "greedy_epsilon": [
                 utils.CappedLinear(2e5, 0.1, 0.01),
                 utils.CappedLinear(5e5, 0.3, 0.01)
             ]
         })
Esempio n. 2
0
 def __init__(self):
     env = gym.make("BreakoutNoFrameskip-v4")
     env = full_wrap_dqn(env)
     env = RewardLongerEnv(env)
     f = f_dqn_atari(env.action_space.n)
     super(DQNBreakout, self).__init__(
         env,
         f,
         episode_n=10000,
         greedy_epsilon=utils.CappedLinear(2e5, 0.1, 0.01),
         batch_size=64,
         network_optimizer_ctor=lambda: hrl.network.LocalOptimizer(
             tf.train.AdamOptimizer(1e-4), grad_clip=10.0))
Esempio n. 3
0
 def __init__(self,
              greedy_epsilon=utils.CappedLinear(5e5, 0.1, 0.01),
              learning_rate=1e-3):
     env = gym.make("PongNoFrameskip-v4")
     env = full_wrap_dqn(env)
     f = f_dqn_atari(env.action_space.n)
     network_optimizer_ctor = lambda: hrl.network.LocalOptimizer(
         tf.train.AdamOptimizer(learning_rate), grad_clip=10.0)
     super(DQNPong,
           self).__init__(env,
                          f,
                          greedy_epsilon=greedy_epsilon,
                          network_optimizer_ctor=network_optimizer_ctor)
Esempio n. 4
0
 def __init__(self,
              env,
              f_create_q,
              episode_n=1000,
              discount_factor=0.99,
              ddqn=False,
              target_sync_interval=100,
              target_sync_rate=1.0,
              update_interval=4,
              replay_size=5000,
              batch_size=32,
              greedy_epsilon=utils.CappedLinear(5e5, 0.1, 0.01),
              network_optimizer_ctor=lambda: hrl.network.LocalOptimizer(
                  tf.train.AdamOptimizer(1e-3), grad_clip=10.0)):
     super(DQNAtari,
           self).__init__(env, f_create_q, episode_n, discount_factor, ddqn,
                          target_sync_interval, target_sync_rate,
                          update_interval, replay_size, batch_size,
                          greedy_epsilon, network_optimizer_ctor)
Esempio n. 5
0
    def __init__(self,
                 env=None,
                 f_create_q=None,
                 episode_n=10000,
                 discount_factor=0.99,
                 ddqn=False,
                 target_sync_interval=100,
                 target_sync_rate=1.0,
                 update_interval=4,
                 replay_size=1000,
                 batch_size=8,
                 lower_weight=1.0,
                 upper_weight=1.0,
                 neighbour_size=8,
                 greedy_epsilon=utils.CappedLinear(2e5, 0.1, 0.01),
                 learning_rate=1e-4,
                 sampler_creator=None,
                 **kwargs):
        if env is None:
            env = gym.make("BreakoutNoFrameskip-v4")
            env = full_wrap_dqn(env)
            env = RewardLongerEnv(env)
        f_create_q = f_dqn_atari(env.action_space.n)

        def f(args):
            sampler = sampling.TruncateTrajectorySampler(
                replay_memory=playback.NearPrioritizedPlayback(5000),
                batch_size=batch_size,
                trajectory_length=8,
                interval=update_interval)
            return sampler

        sampler_creator = f
        super(APEROTDQNBreakout,
              self).__init__(env, f_create_q, episode_n, discount_factor, ddqn,
                             target_sync_interval, target_sync_rate,
                             update_interval, replay_size, batch_size,
                             lower_weight, upper_weight, neighbour_size,
                             greedy_epsilon, learning_rate, sampler_creator,
                             **kwargs)