def define_agent(self, width, height, num_actions): return NStepDQNAgent(config=Config( num_actions=num_actions, encoder=LayerEncoder(width, height, treasure_position=True), optimizer=SharedAdamOptimizer(0.001), network=CNN(hidden_units=[128]), policy=EpsilonGreedyPolicy(1, 0.01, 25000), discount=0.95, n_step=16))
def define_agent(self, width, height, num_actions): return NStepDQNAgent( config=Config(num_actions=num_actions, encoder=OneHotEncoder(width, height), optimizer=AdamOptimizer(0.01), network=MLP(), policy=EpsilonGreedyPolicy(1, 0.01, 1000), discount=0.95, n_step=8))
def define_agent(self, width, height, num_actions): return DQNAgent(config=Config(num_actions=num_actions, encoder=OneHotEncoder(width, height), optimizer=AdamOptimizer(0.01), network=MLP(), policy=EpsilonGreedyPolicy(1, 0.01, 500), discount=0.95, capacity=100, batch_size=16))
def define_agent(self, width, height, num_actions): return DQNAgent( config=Config( num_actions=num_actions, encoder=LayerEncoder(width, height, treasure_position=True), optimizer=AdamOptimizer(0.001), network=CNN(hidden_units=[128]), policy=EpsilonGreedyPolicy(1, 0.01, 50000), discount=0.95, capacity=10000, batch_size=8, target_sync=100, double_q=True))