Ejemplo n.º 1
0
class ReusePolicy(EpsGreedyQPolicy):
    def __init__(self, agent_weights=None, *args, **kwargs):
        super(ReusePolicy, self).__init__(*args, **kwargs)
        self.nb_actions = 3
        self.model = Sequential()
        self.model.add(Flatten(input_shape=(1,) + (4,)))
        self.model.add(Dense(64))
        self.model.add(Activation('relu'))
        self.model.add(Dense(32))
        self.model.add(Activation('relu'))
        self.model.add(Dense(self.nb_actions))
        self.model.add(Activation('linear'))
        self.memory = SequentialMemory(limit=500, window_length=1)
        self.policy = BoltzmannQPolicy()
        self.dqn = DQNAgent(model=self.model, nb_actions=self.nb_actions, memory=self.memory, nb_steps_warmup=5,
                       target_model_update=1e-2, policy=self.policy)
        self.dqn.compile(Adam(lr=1e-3), metrics=['mae'])
        self.dqn.load_weights(agent_weights)

    def simplify_obs(self, obs):
        simple_obs = [obs[0][0:4]]
        return simple_obs

    def select_action(self, q_values, state):
        assert q_values.ndim == 1
        nb_actions_current_env = q_values.shape[0]
        rand_number = np.random.uniform()
        if rand_number > self.eps:
            simple_obs = self.simplify_obs(state)
            reuse_q_values = self.dqn.compute_q_values(simple_obs)
            action = np.argmax(reuse_q_values)
        elif rand_number < (1 - self.eps)/2:
            action = np.argmax(q_values)
        else:
            action = np.random.random_integers(0, nb_actions_current_env - 1)
        return action
print(model.summary())

# モデル書き出し
model_json_str = model.to_json()
open('dqn_{}_model.json'.format(ENV_NAME), 'w').write(model_json_str)

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=100,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)
dqn.compute_q_values()