from rl.agents.dqn import DQNAgent from rl.policy import EpsGreedyQPolicy from keras.models import Sequential from keras.layers import Dense from keras.optimizers import Adam from rl.memory import SequentialMemory model = Sequential() model.add(Dense(16, input_shape=(env.observation_space.shape[0],), activation='relu')) model.add(Dense(env.action_space.n, activation='linear')) memory = SequentialMemory(limit=1000000, window_length=1) policy = EpsGreedyQPolicy() dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
from rl.agents.dqn import DQNAgent from rl.policy import LinearAnnealedPolicy from keras.models import Sequential from keras.layers import Dense from keras.optimizers import Adam from rl.memory import SequentialMemory model = Sequential() model.add(Dense(32, input_shape=(env.observation_space.shape[0],), activation='relu')) model.add(Dense(32, activation='relu')) model.add(Dense(env.action_space.n, activation='linear')) memory = SequentialMemory(limit=10000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=.00025), metrics=['mae']) dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)This example trains a DQN agent on the same environment as the previous example, but uses a LinearAnnealedPolicy instead of EpsGreedyQPolicy. This policy creates a gradually decreasing epsilon value to use in training, starting at 1.0 and ending at 0.1 after 10,000 steps, then decreasing to the value of 0.05 over another 10,000 steps. The DQN agent's neural network model has two dense layers each containing 32 neurons and the activation function "relu." The package library in which these examples were used is the Keras-RL library.