def __init__(self): super(GridSearchPong, self).__init__( DQNPong, { "learning_rate": [1e-3, 1e-5], "greedy_epsilon": [ utils.CappedLinear(2e5, 0.1, 0.01), utils.CappedLinear(5e5, 0.3, 0.01) ] })
def __init__(self): env = gym.make("BreakoutNoFrameskip-v4") env = full_wrap_dqn(env) env = RewardLongerEnv(env) f = f_dqn_atari(env.action_space.n) super(DQNBreakout, self).__init__( env, f, episode_n=10000, greedy_epsilon=utils.CappedLinear(2e5, 0.1, 0.01), batch_size=64, network_optimizer_ctor=lambda: hrl.network.LocalOptimizer( tf.train.AdamOptimizer(1e-4), grad_clip=10.0))
def __init__(self, greedy_epsilon=utils.CappedLinear(5e5, 0.1, 0.01), learning_rate=1e-3): env = gym.make("PongNoFrameskip-v4") env = full_wrap_dqn(env) f = f_dqn_atari(env.action_space.n) network_optimizer_ctor = lambda: hrl.network.LocalOptimizer( tf.train.AdamOptimizer(learning_rate), grad_clip=10.0) super(DQNPong, self).__init__(env, f, greedy_epsilon=greedy_epsilon, network_optimizer_ctor=network_optimizer_ctor)
def __init__(self, env, f_create_q, episode_n=1000, discount_factor=0.99, ddqn=False, target_sync_interval=100, target_sync_rate=1.0, update_interval=4, replay_size=5000, batch_size=32, greedy_epsilon=utils.CappedLinear(5e5, 0.1, 0.01), network_optimizer_ctor=lambda: hrl.network.LocalOptimizer( tf.train.AdamOptimizer(1e-3), grad_clip=10.0)): super(DQNAtari, self).__init__(env, f_create_q, episode_n, discount_factor, ddqn, target_sync_interval, target_sync_rate, update_interval, replay_size, batch_size, greedy_epsilon, network_optimizer_ctor)
def __init__(self, env=None, f_create_q=None, episode_n=10000, discount_factor=0.99, ddqn=False, target_sync_interval=100, target_sync_rate=1.0, update_interval=4, replay_size=1000, batch_size=8, lower_weight=1.0, upper_weight=1.0, neighbour_size=8, greedy_epsilon=utils.CappedLinear(2e5, 0.1, 0.01), learning_rate=1e-4, sampler_creator=None, **kwargs): if env is None: env = gym.make("BreakoutNoFrameskip-v4") env = full_wrap_dqn(env) env = RewardLongerEnv(env) f_create_q = f_dqn_atari(env.action_space.n) def f(args): sampler = sampling.TruncateTrajectorySampler( replay_memory=playback.NearPrioritizedPlayback(5000), batch_size=batch_size, trajectory_length=8, interval=update_interval) return sampler sampler_creator = f super(APEROTDQNBreakout, self).__init__(env, f_create_q, episode_n, discount_factor, ddqn, target_sync_interval, target_sync_rate, update_interval, replay_size, batch_size, lower_weight, upper_weight, neighbour_size, greedy_epsilon, learning_rate, sampler_creator, **kwargs)