예제 #1
0
 def define_agent(self, width, height, num_actions):
     return NStepDQNAgent(config=Config(
         num_actions=num_actions,
         encoder=LayerEncoder(width, height, treasure_position=True),
         optimizer=SharedAdamOptimizer(0.001),
         network=CNN(hidden_units=[128]),
         policy=EpsilonGreedyPolicy(1, 0.01, 25000),
         discount=0.95,
         n_step=16))
예제 #2
0
 def define_agent(self, width, height, num_actions):
     return NStepDQNAgent(
         config=Config(num_actions=num_actions,
                       encoder=OneHotEncoder(width, height),
                       optimizer=AdamOptimizer(0.01),
                       network=MLP(),
                       policy=EpsilonGreedyPolicy(1, 0.01, 1000),
                       discount=0.95,
                       n_step=8))
예제 #3
0
 def define_agent(self, width, height, num_actions):
     return DQNAgent(config=Config(num_actions=num_actions,
                                   encoder=OneHotEncoder(width, height),
                                   optimizer=AdamOptimizer(0.01),
                                   network=MLP(),
                                   policy=EpsilonGreedyPolicy(1, 0.01, 500),
                                   discount=0.95,
                                   capacity=100,
                                   batch_size=16))
예제 #4
0
 def define_agent(self, width, height, num_actions):
     return DQNAgent(
         config=Config(
             num_actions=num_actions,
             encoder=LayerEncoder(width, height, treasure_position=True),
             optimizer=AdamOptimizer(0.001),
             network=CNN(hidden_units=[128]),
             policy=EpsilonGreedyPolicy(1, 0.01, 50000),
             discount=0.95,
             capacity=10000,
             batch_size=8,
             target_sync=100,
             double_q=True))