def main(): # setup vars rom = b'D:\\_code\\_reinforcementlearning\\breakout.bin' gamename = 'breakout' skip_frame = 4 num_actions = 4 learner = PrioritizedExperienceLearner(skip_frame, num_actions) game_handler = GameHandler(rom, False, learner, skip_frame) scoreList = list() bestTotReward = -np.inf # plt.ion() st = time.time() for episode in range(50): total_reward = game_handler.run_one_game(learner, lives=5, life_ram_ind=BREAKOUT, early_return=True) scoreList.append(total_reward) learner.game_over() # if this is the best score save it as such if total_reward >= bestTotReward: learner.save('dqnbest{0}.pkl'.format(total_reward)) bestTotReward = total_reward # plot cost and score plt.clf() plt.subplot(1, 2, 1) plt.plot(learner.get_cost_list(), '.') plt.subplot(1, 2, 2) sl = np.asarray(scoreList) plt.plot(sl, '.') # plt.pause(0.01) # save params every 10 games if episode % 10 == 0: learner.save('dqn{0}.pkl'.format(episode)) et = time.time() print("Episode " + str(episode) + " ended with score: " + str(total_reward)) print('Total Time:', et - st, 'Frame Count:', game_handler.total_frame_count, 'FPS:', game_handler.total_frame_count / (et - st)) # plt.ioff() plt.show() # final save learner.save('dqn{0}.pkl'.format(episode+1))
def main(): game_handler = GameHandler(rom, False, learner, skip_frame) scoreList = list() validLossList = list() bestTotReward = -np.inf # plt.ion() st = time.time() for episode in range(2): learner.copy_new_target() # copy a new target learner at the start of each game total_reward = game_handler.run_one_game(learner, lives=5, life_ram_ind=BREAKOUT, early_return=True) scoreList.append(total_reward) learner.game_over() # if this is the best score save it as such if total_reward >= bestTotReward: learner.save('dqnbest{0}.pkl'.format(total_reward)) bestTotReward = total_reward # plot cost and score # plt.clf() # plt.subplot(1, 2, 1) # plt.plot(learner.get_cost_list(), '.') # plt.subplot(1, 2, 2) # sl = np.asarray(scoreList) # plt.plot(sl, '.') # plt.pause(0.0001) # save params every 10 games if episode % 10 == 0: learner.save('dqn{0}.pkl'.format(episode)) et = time.time() print("Episode " + str(episode) + " ended with score: " + str(total_reward)) print('Total Time:', et - st, 'Frame Count:', game_handler.total_frame_count, 'FPS:', game_handler.total_frame_count / (et - st)) # plt.ioff() # plt.show() # final save learner.save('dqn{0}.pkl'.format(episode+1))
import matplotlib.pyplot as plt import numpy as np from learningALE.handlers.gamehandler import GameHandler from learningALE.learners.DQN import DQNLearner # setup vars rom = b'D:\\_code\\breakout.bin' gamename = 'breakout' skip_frame = 4 num_actions = 4 learner = DQNLearner(skip_frame, num_actions) epochs = 100 ep_count = 0 game_handler = GameHandler(rom, False, skip_frame, learner) scoreList = list() bestTotReward = -np.inf plt.ion() st = time.time() while game_handler.total_frame_count/skip_frame < epochs*50000: total_reward = game_handler.run_one_game(learner) scoreList.append(total_reward) learner.game_over() # if this is the best score save it as such if total_reward >= bestTotReward: learner.save('dqnbest{0}.pkl'.format(total_reward)) bestTotReward = total_reward
import time import matplotlib.pyplot as plt import numpy as np from learningALE.handlers.gamehandler import GameHandler from DoubleDQNLearner import DoubleDQNLearner from learningALE.tools.life_ram_inds import BREAKOUT # setup vars rom = b'D:\\_code\\_reinforcementlearning\\breakout.bin' gamename = 'breakout' skip_frame = 4 num_actions = 4 learner = DoubleDQNLearner(skip_frame, num_actions) game_handler = GameHandler(rom, False, learner, skip_frame) scoreList = list() validLossList = list() bestTotReward = -np.inf plt.ion() st = time.time() for episode in range(5000): learner.copy_new_target() # copy a new target learner at the start of each game total_reward = game_handler.run_one_game(learner, lives=5, life_ram_ind=BREAKOUT) scoreList.append(total_reward) learner.game_over() # if this is the best score save it as such if total_reward >= bestTotReward: learner.save('dqnbest{0}.pkl'.format(total_reward))
epoch_files.append(file) epoch_dqn = list() for dqn in epoch_files: epoch = dqn.replace('dqn', '') epoch = epoch.replace('.pkl', '') epoch = round(float(epoch) / 50000, 1) epoch_dqn.append([epoch, dqn]) epoch_dqn.sort() # setup vars rom = b'D:\\_code\\breakout.bin' skip_frame = 4 num_actions = 4 game_handler = GameHandler(rom, False, skip_frame) # now test to plot score num_to_run = 10 means = list() q_vals = list() for ep, file in epoch_dqn: player_file = os.getcwd() + '\\' + file learner = DQNTester(skip_frame, num_actions, load=player_file) game_handler.set_legal_actions(learner) rews, times, frames = evaluate_player(learner, game_handler, num_to_run) means.append(np.mean(rews)) qval_arr = np.asarray(learner.q_vals) qval_arr[np.isinf(qval_arr)] = np.nan mean_qvals = np.nanmean(qval_arr, axis=0)
import time import matplotlib.pyplot as plt import numpy as np from learningALE.handlers.gamehandler import GameHandler from learningALE.learners.DQN import DQNTester # setup vars rom = b'D:\\_code\\breakout.bin' gamename = 'breakout' skip_frame = 4 num_actions = 4 learner = DQNTester(skip_frame, num_actions, load='D:\_code\learningALE\experiments\\reproduction\DQN_Original\dqnbest36.0.pkl', rand_val=0.05) st = time.time() game_handler = GameHandler(rom, True, skip_frame, learner) total_reward = game_handler.run_one_game(learner, clip=False, neg_reward=False) et = time.time() print("Episode ended with score: " + str(total_reward)) print('Total Time:', et - st, 'Frame Count:', game_handler.total_frame_count, 'FPS:', game_handler.total_frame_count / (et - st))