def test(strategy=dqn, log_file='train_params.json'): with open('test_params.json', 'r') as file: read_params = json.load(file) game_params = read_params['params'] test_start_states = read_params['states'] total_history = [] total_scores = [] env = PacmanGame(**game_params) for start_state in test_start_states: preprocess(start_state) episode_history = [] env.reset() env.player = start_state['player'] env.monsters = start_state['monsters'] env.diamonds = start_state['diamonds'] env.walls = start_state['walls'] assert len(env.monsters) == env.nmonsters and len( env.diamonds) == env.ndiamonds and len(env.walls) == env.nwalls obs = env.get_obs() episode_history.append(obs) while not obs['end_game']: action = strategy(obs) obs = env.make_action(action) episode_history.append(obs) total_history.append(episode_history) total_scores.append(obs['total_score']) mean_score = np.mean(total_scores) with open(log_file, 'w') as file: json.dump(total_history, file) print( "Your average score is {}, saved log to '{}'. Do not forget to upload it for submission!" .format(mean_score, log_file)) return mean_score
from keras.models import Sequential, clone_model from keras.layers import Dense, InputLayer, Flatten, Conv2D, InputLayer from keras.optimizers import Adam from keras.callbacks import CSVLogger, TensorBoard import keras.backend as K import matplotlib.pyplot as plt plt.rcParams['figure.figsize'] = (9, 9) ## READING IN THE CUSTOM PACMAN ENVIRONMENT AND INSTANTIATING IT from mini_pacman import test, random_strategy, naive_strategy, PacmanGame with open('minipacman_test/test_params.json', 'r') as file: read_params = json.load(file) game_params = read_params['params'] env = PacmanGame(**game_params) #CREATING THE VANILA DQN MODEL FUNCTION - to be used for online & target networks def create_dqn_model(input_shape, nb_actions, dense_layers, dense_units): model = Sequential() model.add(InputLayer(input_shape=input_shape)) for i in range(dense_layers): model.add(Dense(units=dense_units, activation='relu')) model.add(Dense(nb_actions, activation='linear')) return model # Compile the online network using Adam optimizer and loss function of type `mse`. input_shape = (32, ) nb_actions = 9
import json from mini_pacman import PacmanGame from mini_pacman import test, random_strategy, naive_strategy with open('test_params.json', 'r') as file: read_params = json.load(file) game_params = read_params['params'] env = PacmanGame(**game_params) test(strategy=naive_strategy, log_file='test_pacman_log_naive.json')
v.append(x) v.append(y) for x, y in obs['diamonds']: v.append(x) v.append(y) for x, y in obs['walls']: v.append(x) v.append(y) return v #env = gym.make("MsPacman-ram-v0") env = PacmanGame(field_shape=(8, 8), nmonsters=2, ndiamonds=3, nwalls=10, monster_vision_range=2, max_moves=100, diamond_reward=100, survival_reward=1) #env = PacmanGame(field_shape=(10,10), nmonsters=2,ndiamonds=3, nwalls=4, monster_vision_range=1) obs = env.reset() end_game = False score = 0 def create_dqn_model(input_shape, nb_actions, dense_layers, dense_units): model = Sequential() model.add(InputLayer(input_shape=input_shape)) for i in range(dense_layers): model.add(Dense(units=dense_units, activation='relu')) model.add(Dense(nb_actions, activation='linear'))
# Using DQN strategy that takes an observation of PacmanGame and returns an action (integer from 1 to 9). # Specific game parameters (such as number of monsters, diamonds, etc) stored in test_params.json. import random import numpy as np from keras.models import Sequential, clone_model from keras.layers import Dense from keras.optimizers import Adam import json # load game and its parameters from mini_pacman import PacmanGame with open('test_params.json', 'r') as file: read_params = json.load(file) game_params = read_params['params'] env = PacmanGame(**game_params) # get current state as a vector of features def get_state(obs): v = [] x, y = obs['player'] v.append(x) v.append(y) for x, y in obs['monsters']: v.append(x) v.append(y) for x, y in obs['diamonds']: v.append(x) v.append(y) for x, y in obs['walls']:
import pyglet import gc import time from keras.models import Sequential, clone_model from keras.layers import Dense, InputLayer from keras.optimizers import Adam from keras.callbacks import CSVLogger, TensorBoard import keras.backend as K import json from mini_pacman import PacmanGame with open('test_params.json', 'r') as file: read_params = json.load(file) game_params = read_params['params'] env = PacmanGame(**game_params) env.render() from tabulate import tabulate def create_dqn_model(input_shape, nb_actions): model = Sequential() model.add(Dense(units=1000, input_shape=input_shape, activation='relu')) model.add(Dense(units=1000, activation='relu')) model.add(Dense(units=1000, activation='relu')) model.add(Dense(nb_actions, activation='linear')) return model
obs = new_obs state = new_state scores.append(obs['total_score']) return if __name__ == "__main__": FIELD_SHAPE = (4, 4) N_MONSTERS = 1 N_DIAMONDS = 1 N_WALLS = 0 MONSTER_VISION_RANGE = 1 env = PacmanGame(field_shape=FIELD_SHAPE, nmonsters=N_MONSTERS, ndiamonds=N_DIAMONDS, nwalls=N_WALLS, monster_vision_range=MONSTER_VISION_RANGE) ### Make n_games lower if you get MemoryError ql = QLearn(gamma=0.95, alpha=0.05) train_scores = [] # container for results n_games = 100000 # number of games per eps eps_list = [0.9, 0.7, 0.5, 0.3, 0.2, 0.1, 0.05, 0.0] for eps in eps_list: print('Training with eps = {} ...'.format(eps)) q1.train(eps, n_games, ql, training=True, scores=train_scores) print('Done.')
#import matplotlib.pyplot as plt import numpy as np import random import time import pyglet from mini_pacman import PacmanGame env = PacmanGame(field_shape=(10, 10), nmonsters=2, ndiamonds=3, nwalls=4, monster_vision_range=1) env.print_field() # will print a picture in text symbols #env.render() # creates graphical rendering of the field # Render random-action game obs = env.reset() while not obs['end_game']: action = random.choice(obs['possible_actions']) obs = env.make_action(action) env.render() time.sleep(0.5)