def main():
    # setup vars
    rom = b'D:\\_code\\_reinforcementlearning\\breakout.bin'
    gamename = 'breakout'
    skip_frame = 4
    num_actions = 4
    learner = PrioritizedExperienceLearner(skip_frame, num_actions)
    game_handler = GameHandler(rom, False, learner, skip_frame)
    scoreList = list()
    bestTotReward = -np.inf

    # plt.ion()
    st = time.time()
    for episode in range(50):
        total_reward = game_handler.run_one_game(learner, lives=5, life_ram_ind=BREAKOUT, early_return=True)
        scoreList.append(total_reward)

        learner.game_over()

        # if this is the best score save it as such
        if total_reward >= bestTotReward:
            learner.save('dqnbest{0}.pkl'.format(total_reward))
            bestTotReward = total_reward

        # plot cost and score
        plt.clf()
        plt.subplot(1, 2, 1)
        plt.plot(learner.get_cost_list(), '.')
        plt.subplot(1, 2, 2)
        sl = np.asarray(scoreList)
        plt.plot(sl, '.')
        # plt.pause(0.01)

        # save params every 10 games
        if episode % 10 == 0:
            learner.save('dqn{0}.pkl'.format(episode))

        et = time.time()
        print("Episode " + str(episode) + " ended with score: " + str(total_reward))
        print('Total Time:', et - st, 'Frame Count:', game_handler.total_frame_count, 'FPS:', game_handler.total_frame_count / (et - st))

    # plt.ioff()
    plt.show()

    # final save
    learner.save('dqn{0}.pkl'.format(episode+1))
def main():
    game_handler = GameHandler(rom, False, learner, skip_frame)
    scoreList = list()
    validLossList = list()
    bestTotReward = -np.inf
    # plt.ion()
    st = time.time()
    for episode in range(2):
        learner.copy_new_target()  # copy a new target learner at the start of each game
        total_reward = game_handler.run_one_game(learner, lives=5, life_ram_ind=BREAKOUT, early_return=True)
        scoreList.append(total_reward)

        learner.game_over()

        # if this is the best score save it as such
        if total_reward >= bestTotReward:
            learner.save('dqnbest{0}.pkl'.format(total_reward))
            bestTotReward = total_reward

        # plot cost and score
        # plt.clf()
        # plt.subplot(1, 2, 1)
        # plt.plot(learner.get_cost_list(), '.')
        # plt.subplot(1, 2, 2)
        # sl = np.asarray(scoreList)
        # plt.plot(sl, '.')
        # plt.pause(0.0001)

        # save params every 10 games
        if episode % 10 == 0:
            learner.save('dqn{0}.pkl'.format(episode))

        et = time.time()
        print("Episode " + str(episode) + " ended with score: " + str(total_reward))
        print('Total Time:', et - st, 'Frame Count:', game_handler.total_frame_count, 'FPS:', game_handler.total_frame_count / (et - st))

    # plt.ioff()
    # plt.show()

    # final save
    learner.save('dqn{0}.pkl'.format(episode+1))
import matplotlib.pyplot as plt
import numpy as np
from learningALE.handlers.gamehandler import GameHandler
from learningALE.learners.DQN import DQNLearner


# setup vars
rom = b'D:\\_code\\breakout.bin'
gamename = 'breakout'
skip_frame = 4
num_actions = 4
learner = DQNLearner(skip_frame, num_actions)
epochs = 100

ep_count = 0
game_handler = GameHandler(rom, False, skip_frame, learner)
scoreList = list()
bestTotReward = -np.inf
plt.ion()
st = time.time()
while game_handler.total_frame_count/skip_frame < epochs*50000:
    total_reward = game_handler.run_one_game(learner)
    scoreList.append(total_reward)

    learner.game_over()

    # if this is the best score save it as such
    if total_reward >= bestTotReward:
        learner.save('dqnbest{0}.pkl'.format(total_reward))
        bestTotReward = total_reward
import time
import matplotlib.pyplot as plt
import numpy as np
from learningALE.handlers.gamehandler import GameHandler
from DoubleDQNLearner import DoubleDQNLearner
from learningALE.tools.life_ram_inds import BREAKOUT


# setup vars
rom = b'D:\\_code\\_reinforcementlearning\\breakout.bin'
gamename = 'breakout'
skip_frame = 4
num_actions = 4
learner = DoubleDQNLearner(skip_frame, num_actions)
game_handler = GameHandler(rom, False, learner, skip_frame)
scoreList = list()
validLossList = list()
bestTotReward = -np.inf

plt.ion()
st = time.time()
for episode in range(5000):
    learner.copy_new_target()  # copy a new target learner at the start of each game
    total_reward = game_handler.run_one_game(learner, lives=5, life_ram_ind=BREAKOUT)
    scoreList.append(total_reward)

    learner.game_over()

    # if this is the best score save it as such
    if total_reward >= bestTotReward:
        learner.save('dqnbest{0}.pkl'.format(total_reward))
        epoch_files.append(file)

epoch_dqn = list()
for dqn in epoch_files:
    epoch = dqn.replace('dqn', '')
    epoch = epoch.replace('.pkl', '')
    epoch = round(float(epoch) / 50000, 1)
    epoch_dqn.append([epoch, dqn])

epoch_dqn.sort()

# setup vars
rom = b'D:\\_code\\breakout.bin'
skip_frame = 4
num_actions = 4
game_handler = GameHandler(rom, False, skip_frame)

# now test to plot score
num_to_run = 10
means = list()
q_vals = list()
for ep, file in epoch_dqn:
    player_file = os.getcwd() + '\\' + file
    learner = DQNTester(skip_frame, num_actions, load=player_file)
    game_handler.set_legal_actions(learner)

    rews, times, frames = evaluate_player(learner, game_handler, num_to_run)
    means.append(np.mean(rews))
    qval_arr = np.asarray(learner.q_vals)
    qval_arr[np.isinf(qval_arr)] = np.nan
    mean_qvals = np.nanmean(qval_arr, axis=0)
import time
import matplotlib.pyplot as plt
import numpy as np
from learningALE.handlers.gamehandler import GameHandler
from learningALE.learners.DQN import DQNTester


# setup vars
rom = b'D:\\_code\\breakout.bin'
gamename = 'breakout'
skip_frame = 4
num_actions = 4
learner = DQNTester(skip_frame, num_actions,
                    load='D:\_code\learningALE\experiments\\reproduction\DQN_Original\dqnbest36.0.pkl',
                    rand_val=0.05)

st = time.time()
game_handler = GameHandler(rom, True, skip_frame, learner)
total_reward = game_handler.run_one_game(learner, clip=False, neg_reward=False)
et = time.time()
print("Episode ended with score: " + str(total_reward))
print('Total Time:', et - st, 'Frame Count:', game_handler.total_frame_count, 'FPS:', game_handler.total_frame_count / (et - st))