class ReusePolicy(EpsGreedyQPolicy): def __init__(self, agent_weights=None, *args, **kwargs): super(ReusePolicy, self).__init__(*args, **kwargs) self.nb_actions = 3 self.model = Sequential() self.model.add(Flatten(input_shape=(1,) + (4,))) self.model.add(Dense(64)) self.model.add(Activation('relu')) self.model.add(Dense(32)) self.model.add(Activation('relu')) self.model.add(Dense(self.nb_actions)) self.model.add(Activation('linear')) self.memory = SequentialMemory(limit=500, window_length=1) self.policy = BoltzmannQPolicy() self.dqn = DQNAgent(model=self.model, nb_actions=self.nb_actions, memory=self.memory, nb_steps_warmup=5, target_model_update=1e-2, policy=self.policy) self.dqn.compile(Adam(lr=1e-3), metrics=['mae']) self.dqn.load_weights(agent_weights) def simplify_obs(self, obs): simple_obs = [obs[0][0:4]] return simple_obs def select_action(self, q_values, state): assert q_values.ndim == 1 nb_actions_current_env = q_values.shape[0] rand_number = np.random.uniform() if rand_number > self.eps: simple_obs = self.simplify_obs(state) reuse_q_values = self.dqn.compute_q_values(simple_obs) action = np.argmax(reuse_q_values) elif rand_number < (1 - self.eps)/2: action = np.argmax(q_values) else: action = np.random.random_integers(0, nb_actions_current_env - 1) return action
print(model.summary()) # モデル書き出し model_json_str = model.to_json() open('dqn_{}_model.json'.format(ENV_NAME), 'w').write(model_json_str) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True) dqn.compute_q_values()