Beispiel #1
0
            # Train online network
            online_network.fit(replay_state,
                               target,
                               epochs=step,
                               verbose=1,
                               initial_epoch=step - 1,
                               callbacks=[csv_logger, tensorboard])

            # Periodically copy online network weights to target network
            if step % copy_steps == 0:
                target_network.set_weights(online_network.get_weights())
            # And save weights
            if step % save_steps == 0:
                online_network.save_weights(
                    os.path.join(weights_folder,
                                 'weights_{}.h5f'.format(step)))
                gc.collect()  # also clean the garbage

### BASELINE STRATEGIES FOR COMPARISON
from mini_pacman import test, random_strategy, naive_strategy

random_med = test(strategy=random_strategy,
                  log_file='test_pacman_log_random.json')
naive_med = test(strategy=naive_strategy,
                 log_file='test_pacman_log_naive.json')
custom_med = test(strategy=custom_strategy,
                  log_file='test_pacman_log_custom.json')

print(
    f'Random Median = {random_med} Naive Median = {naive_med} Custom Median = {custom_med}'
)
Beispiel #2
0
import json
from mini_pacman import PacmanGame
from mini_pacman import test, random_strategy, naive_strategy

with open('test_params.json', 'r') as file:
    read_params = json.load(file)
game_params = read_params['params']
env = PacmanGame(**game_params)

test(strategy=naive_strategy, log_file='test_pacman_log_naive.json')
Beispiel #3
0
        minibatch = random.sample(replay_memory, batch_size)
        replay_state = np.array([get_state(x[0]) for x in minibatch])
        replay_action = np.array([x[1] for x in minibatch])
        replay_rewards = np.array([x[2] for x in minibatch])
        replay_next_state = np.array([get_state(x[3]) for x in minibatch])
        replay_done = np.array([x[4] for x in minibatch], dtype=int)
        target_predict = target_network.predict(replay_next_state)
        target_for_action = replay_rewards + (1-replay_done) * gamma * \
                                    np.amax(target_network.predict(replay_next_state), axis=1)
        target = online_network.predict(replay_state)
        target[np.arange(batch_size), replay_action] = target_for_action
        online_network.fit(replay_state,
                           target,
                           epochs=step,
                           verbose=1,
                           initial_epoch=step - 1)
        if step % copy_steps == 0:
            target_network.set_weights(online_network.get_weights())

from keras.models import load_model


def test_dqn_strategy(obs):
    q_values = online_network.predict(np.array([get_state(obs)]))
    action = epsilon_greedy(q_values, 0.05, obs['possible_actions'])
    return action + 1


from mini_pacman import test
test(strategy=test_dqn_strategy, log_file='test_pacman_log.json')
# save the deep neural network that estimates the Q-values
online_network.save('saved_dqn_mini_pacman_model.h5')

from keras.models import load_model
dqn_model = load_model('saved_dqn_mini_pacman_model.h5')


def dqn_strategy(obs):
    q_values = dqn_model.predict(np.array([get_state(obs)]))[0]
    action = epsilon_greedy(q_values, eps_min, nb_actions)
    return action + 1


# Some sub-optimal strategies are availiable for comparison: random_strategy moves the agent by random selection of actions, naive_strategy uses some basic heuristics.
from mini_pacman import test, random_strategy, naive_strategy
test(strategy=random_strategy, log_file='test_pacman_log.json')
test(strategy=naive_strategy, log_file='test_pacman_log.json')
test(strategy=dqn_strategy, log_file='test_pacman_log.json')

# to see the game get played with the DQN strategy
import time
obs = env.reset()
env.render()
state = get_state(obs)
while not obs['end_game']:
    time.sleep(0.1)
    # select best next action using Q-Learning (no random component here, eps=0)
    action = dqn_strategy(obs)
    obs = env.make_action(action)
    state = get_state(obs)
    env.render()
Beispiel #5
0
    from mini_pacman import test, random_strategy, naive_strategy

    with open('test_params.json', 'r') as file:
        read_params = json.load(file)
    game_params = read_params['params']
    env = PacmanGame(**game_params)

    DQN = QLearn(env)

    train_params = config.training_params_local  # change to rcc for real training

    DQN.train(name=train_params['name'],
              n_steps=train_params['n_steps'],
              warmup=train_params['warmup'],
              training_interval=train_params['training_interval'],
              copy_steps=train_params['copy_steps'],
              gamma=train_params['gamma'],
              skip_start=train_params['skip_start'],
              batch_size=train_params['batch_size'],
              double_dqn=train_params['double_dqn'],
              eps_max=train_params['eps_max'],
              eps_min=train_params['eps_min'],
              learning_rate=train_params['learning_rate'])

    weights_folder = os.path.join(train_params['name'], 'weights')
    DQN.online_network.load_weights(
        os.path.join(weights_folder, 'weights_last.h5f'))

    test(strategy=DQN.dqn_strategy,
         log_file='test_pacman_log_DQN_local_2.json')