Ejemplo n.º 1
0
class MinimalGameHandler:
    """
    The :class:`MinimalGameHandler` class takes care of the interface to the ALE and tries to do nothing else. It's
    meant for advanced users who need fine control over every aspect of the process. It has many functions that are simply
    wrappers of the underlying ALE but with pythonic names/usage.

    Parameters
    ----------
    rom : byte string
        Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin'
    frame_skip : int
        Default 4. Number of frames to skip inbetween action. This sets frame skip on the Stella environment
    show_rom : boolean
        Default False. Whether or not to show the game. True takes longer to run but can be fun to watch
    """
    def __init__(self, rom, frame_skip=4, show_rom=False):
        # set up emulator
        self.ale = ALEInterface(show_rom)
        self.ale.setInt(b'frame_skip', frame_skip)
        self.ale.loadROM(rom)

        # setup gamescreen object. I think this is faster than recreating an empty each time
        width, height = self.ale.getScreenDims()
        self.gamescreen = np.empty((height, width, 1), dtype=np.uint8)

    def reset(self):
        self.ale.reset_game()

    def step(self, action, clip=None):
        reward = 0
        if clip is not None:
            reward += np.clip(self.ale.act(action), 0, clip)
        else:
            reward += self.ale.act(action)
        return reward

    def get_gamescreen(self, converted=True):
        self.gamescreen = self.ale.getScreenGrayscale(self.gamescreen)

        if converted:
            # convert ALE gamescreen into 84x84 image
            processedImg = imresize(self.gamescreen[33:-16, :, 0], 0.525, interp='nearest')
            return processedImg
        else:
            # we return a copy here because converted returns a copy also. And its easier for exp replay
            return np.copy(self.gamescreen)

    def get_game_over(self):
        return self.ale.game_over()

    def get_legal_actions(self):
        return self.ale.getMinimalActionSet()
Ejemplo n.º 2
0
def main():
    import time
    import numpy as np
    from learningALE.libs.ale_python_interface import ALEInterface

    # this script is used to try and find what ram index stores the number of lives for a game

    ale = ALEInterface(True)

    ale.loadROM(b'D:\\_code\\beam_rider.bin')

    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getLegalActionSet()

    frameCount = 0
    ramlist = list()
    st = time.time()
    for episode in range(1):
        total_reward = 0.0
        while not ale.game_over():
            a = legal_actions[np.random.randint(legal_actions.size)]
            ram = ale.getRAM()
            ramlist.append(ram)
            reward = ale.act(a)
            total_reward += reward
            frameCount += 1
        print("Episode " + str(episode) + " ended with score: " +
              str(total_reward))
        ale.reset_game()
    et = time.time()
    print(et - st, frameCount / (et - st))

    import matplotlib.pyplot as plt
    ramarray = np.asarray(ramlist)
    w = np.where(ramarray > 3)[1]
    ramarray[:, w] = 0
    plt.plot(ramarray)

    notZ = np.where(ramarray != 0)[1]
    unqNZ = np.unique(notZ)
    print(unqNZ)
Ejemplo n.º 3
0
def main():
    import time
    import numpy as np
    from learningALE.libs.ale_python_interface import ALEInterface

    # this script is used to try and find what ram index stores the number of lives for a game

    ale = ALEInterface(True)

    ale.loadROM(b'D:\\_code\\beam_rider.bin')

    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getLegalActionSet()

    frameCount = 0
    ramlist = list()
    st = time.time()
    for episode in range(1):
        total_reward = 0.0
        while not ale.game_over():
            a = legal_actions[np.random.randint(legal_actions.size)]
            ram = ale.getRAM()
            ramlist.append(ram)
            reward = ale.act(a)
            total_reward += reward
            frameCount += 1
        print("Episode " + str(episode) + " ended with score: " + str(total_reward))
        ale.reset_game()
    et = time.time()
    print(et-st, frameCount/(et-st))

    import matplotlib.pyplot as plt
    ramarray = np.asarray(ramlist)
    w = np.where(ramarray > 3)[1]
    ramarray[:, w] = 0
    plt.plot(ramarray)

    notZ = np.where(ramarray != 0)[1]
    unqNZ = np.unique(notZ)
    print(unqNZ)
Ejemplo n.º 4
0
def main():
    import os
    import pickle
    import time

    import lasagne
    import matplotlib.pyplot as plt
    import numpy as np
    from learners.nns import AlloEggoCnn
    from scipy.misc import imresize

    from learningALE.handlers import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface

    dtype = np.float16
    plt.ion()

    # set up emulator
    ale = ALEInterface(True)
    ale.loadROM(b'D:\\_code\\breakout.bin')
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    lives = 5

    # set up vars
    skipFrame = 4

    actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2),
                                  legal_actions)
    scoreList = list()

    cnn = AlloEggoCnn()
    with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin:
        parms = pickle.load(fin)

    lasagne.layers.set_all_param_values(cnn.a_out, parms)

    frameCount = 0
    st = time.time()
    for episode in range(100):
        total_reward = 0.0
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(imresize(gamescreen.view(
                    np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0],
                                                   0.5,
                                                   interp='nearest'),
                                          dtype=dtype) / 255

                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    reward += 1

                ram = ale.getRAM()
                if ram[57] != lives:
                    reward -= 1
                    lives = ram[57]

            frames = np.asarray(frames)

            actionVect = cnn.get_output(frames.reshape(
                (1, skipFrame, 105, 80)))[0]
            actionHandler.setAction(actionVect)

            total_reward += reward
            frameCount += 1 * skipFrame

        ale.reset_game()
        actionHandler.anneal()
        scoreList.append(total_reward)

        lives = 5

        print("Episode " + str(episode) + " ended with score: " +
              str(total_reward))

        et = time.time()
        print('Total Time:', et - st, 'Frame Count:', frameCount, 'FPS:',
              frameCount / (et - st))

    plt.clf()
    plt.plot(scoreList, '.')
    plt.pause(0.01)
    plt.ioff()
Ejemplo n.º 5
0
# start up the python ale interface
ale = ALEInterface()

# load a rom
ale.loadROM(b'd:\_code\_reinforcementlearning\\breakout.bin')

# screen dimensions and legal actions
(screen_width, screen_height) = ale.getScreenDims()
legal_actions = ale.getLegalActionSet()

frameCount = 0
st = time.time()
for episode in range(1):
    total_reward = 0.0
    while not ale.game_over():
        # get a random action
        a = legal_actions[np.random.randint(legal_actions.size)]

        # get gamescreen and convert to usable format (Height x Width x Channels)
        gamescreen = ale.getScreenRGB()
        gamescreen = np.asarray(gamescreen.view(np.uint8).reshape(
            screen_height, screen_width, 4)[:, :, 0],
                                dtype=np.float)

        # get ram
        ram = ale.getRAM()

        # take the action and get the reward
        reward = ale.act(a)
        total_reward += reward
    #process pygame event queue
    exit=False
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            exit=True
            break
    if(pressed[pygame.K_q]):
        exit = True
    if(exit):
        break

    #delay to 60fps
    clock.tick(30.)

    if(ale.game_over()):
        episode_frame_number = ale.getEpisodeFrameNumber()
        frame_number = ale.getFrameNumber()
        print("Frame Number: " + str(frame_number) + " Episode Frame Number: " + str(episode_frame_number))
        print("Episode " + str(episode) + " ended with score: " + str(total_reward))
        ale.reset_game()
        total_reward = 0.0
        episode = episode + 1
        actions.append(copy.deepcopy(np.asarray(curractions, dtype=np.int8)))
        rewards.append(copy.deepcopy(np.asarray(currrewards, dtype=np.int8)))
        states.append(copy.deepcopy(np.asarray(currstates, dtype=np.bool)))
        curractions.clear()
        currrewards.clear()
        currstates.clear()
with open(os.getcwd() + '\datasets\\spc_inv_dataset1q.pkl', 'wb') as outFile:
    pickle.dump((states, actions, rewards), outFile)
Ejemplo n.º 7
0
class GameHandler:
    """
    The :class:`GameHandler` class takes care of the interface between the ALE and the learner.

    Currently supported is the ability to display the ALE screen when running, skip x number of frames by repeating the
    last action, and configuring the dtype to convert the gamescreen to (default is float16 for space).

    Parameters
    ----------
    rom : byte string
        Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin'
    show_rom : boolean
        Whether or not to show the game being played or not. True takes longer to run but can be fun to watch
    skip_frame : int
        Number of frames to skip using the last action chosen
    learner : :class:`learners.learner`
        Default None. The learner, on construction GameHandler will call set_legal_actions. If none then set_legal_actions
        needs to be called
    """

    def __init__(self, rom, show_rom, skip_frame, learner=None):
        # set up emulator
        self.ale = ALEInterface(show_rom)
        self.ale.loadROM(rom)
        (self.screen_width, self.screen_height) = self.ale.getScreenDims()
        legal_actions = self.ale.getMinimalActionSet()

        # set up vars
        self.skipFrame = skip_frame

        if learner:
            learner.set_legal_actions(legal_actions)

        self.total_frame_count = 0

    def run_one_game(self, learner, neg_reward=False, early_return=False, clip=True, max_episode_frame=np.inf):
        """
        Runs a game until ale.game_over() is true. Currently does not support stopping at a specific frame count during
        an episode

        Parameters
        ----------
        learner : :class:`learners.learner`
            Will call get_game_action and frames_processed.
            get_game_action must return a valid ALE action ind. frames_processed can be a pass.

        neg_reward : bool
            Default False. Whether or not to use negative rewards, recieved when agent looses a life.

        early_return : bool
            Default False. If set to true and neg_rewards is set then will return on first loss of life

        clip : bool
            Default True. Whether or not to clip positive rewards to 1

        max_episode_frame : int
            Default np.inf. The maximum number of frames to run per episode

        Returns
        -------
        int
            Total reward from game. Can be negative if neg_reward is true.
        """
        total_reward = 0.0
        gamescreen = None
        self.ale.reset_game()
        cur_lives = self.ale.lives()
        action_to_perform = 0  # initially set at zero because we start the game before asking the learner
        while not self.ale.game_over() and self.ale.getEpisodeFrameNumber() < max_episode_frame:
            # get frames
            frames = list()
            reward = 0

            # loop over skip frame
            for frame in range(self.skipFrame):
                gamescreen = self.ale.getScreenGrayscale(gamescreen)

                # convert ALE gamescreen into usable image, scaled between 0 and 1
                processedImg = imresize(gamescreen[33:-16, :, 0], 0.525, interp="nearest")
                frames.append(processedImg)

                # act on the action to perform, should be ALE compatible action ind
                rew = self.ale.act(action_to_perform)

                # clip positive rewards to 1
                if rew > 0 and clip:
                    reward += 1
                else:
                    reward += rew

                # if allowing negative rewards, see if lives has decreased
                if neg_reward:
                    new_lives = self.ale.lives()
                    if new_lives < cur_lives:
                        reward -= 1  # losing a life is a negative 1 reward
                        cur_lives = new_lives

            # end frame skip loop

            total_reward += reward
            frames = np.asarray(frames)

            # frames_processed must be here before action_to_perform gets overwritten.
            learner.frames_processed(frames, action_to_perform, reward)

            action_to_perform = learner.get_game_action()

            self.total_frame_count += 1 * self.skipFrame

            # if doing early return, end game on first loss of life
            if reward < 0 and early_return:
                return total_reward

        # end of game
        return total_reward

    def set_legal_actions(self, learner):
        learner.set_legal_actions(self.ale.getMinimalActionSet())
Ejemplo n.º 8
0
    #process pygame event queue
    exit = False
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            exit = True
            break
    if (pressed[pygame.K_q]):
        exit = True
    if (exit):
        break

    #delay to 60fps
    clock.tick(30.)

    if (ale.game_over()):
        episode_frame_number = ale.getEpisodeFrameNumber()
        frame_number = ale.getFrameNumber()
        print("Frame Number: " + str(frame_number) +
              " Episode Frame Number: " + str(episode_frame_number))
        print("Episode " + str(episode) + " ended with score: " +
              str(total_reward))
        ale.reset_game()
        total_reward = 0.0
        episode = episode + 1
        actions.append(copy.deepcopy(np.asarray(curractions, dtype=np.int8)))
        rewards.append(copy.deepcopy(np.asarray(currrewards, dtype=np.int8)))
        states.append(copy.deepcopy(np.asarray(currstates, dtype=np.bool)))
        curractions.clear()
        currrewards.clear()
        currstates.clear()
def main():
    import os
    import pickle

    import matplotlib.pyplot as plt
    from learningALE.learners.nns import CNN
    from scipy.misc import imresize

    from learningALE.handlers.actionhandler import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface
    import lasagne
    import numpy as np

    # plt.ion()
    skipFrame = 3
    cnn = CNN((None, skipFrame, 86, 80), 6, .1, stride=(4,2))
    with open(os.getcwd()+'\datasets\\spccnn.pkl', 'rb') as infile:
        parms = pickle.load(infile)
        lasagne.layers.set_all_param_values(cnn.l_out, parms)

    # rom = b'D:\\_code\\breakout.bin'
    rom = b'D:\\_code\\space_invaders.bin'

    ale = ALEInterface(True)
    ale.loadROM(rom)
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    # get labels
    labels = ['noop', 'fire', 'up', 'right', 'left', 'down', 'upright', 'upleft', 'downright', 'downleft', 'upfire', 'rightfire', 'leftfire', 'downfire', 'uprightfire'
              , 'upleftfire', 'downrightfire', 'downleftfire']
    labels = np.asarray(labels)[legal_actions]

    # set up vars
    actionHandler = ActionHandler(ActionPolicy.eGreedy, (.1, .1, 2), legal_actions)
    rewList = list()
    for ep in range(100):
        total_reward = 0.0
        trainCount = 0
        ale.reset_game()
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(
                    gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[25:-12, :, 0],
                    dtype=np.float32)
                processedImg[processedImg > 1] = 255
                processedImg = imresize(processedImg, 0.5, interp='nearest')/255
                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    rew = 1
                reward += rew
            total_reward += reward
            frames = np.asarray(frames, dtype=np.float32)

            actionVect = cnn.get_output(frames.reshape((1, skipFrame, frames.shape[1], 80)))[0]
            actionHandler.setAction(actionVect)
            # hid1_act = cnn.get_hid1_act(frames.reshape((1, skip_frame, frames.shape[1], 80)))
            # hid2_act = cnn.get_hid2_act(frames.reshape((1, skip_frame, frames.shape[1], 80)))
            # for x in range(hid1_act.shape[1]):
            #     plt.subplot(4,4,x+1)
            #     plt.imshow(hid1_act[0,x], cmap=plt.cm.gray)
            # for x in range(hid2_act.shape[1]):
            #     plt.subplot(6,6,x+1)
            #     plt.imshow(hid2_act[0,x], cmap=plt.cm.gray)
            # plt.show()
            # plt.clf()
            # plt.plot(actionVect)
            # plt.xticks(range(len(labels)), labels)
            # plt.pause(0.001)
        rewList.append(total_reward)
        print(ep, total_reward)


    print(np.mean(rewList), np.std(rewList), np.max(rewList), np.min(rewList))
    print(np.unique(rewList, return_counts=True))
    plt.plot(rewList)
    plt.show()
Ejemplo n.º 10
0
def main():
    import os
    import pickle
    import time

    import lasagne
    import matplotlib.pyplot as plt
    import numpy as np
    from learners.nns import AlloEggoCnn
    from scipy.misc import imresize

    from learningALE.handlers import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface

    dtype = np.float16
    plt.ion()

    # set up emulator
    ale = ALEInterface(True)
    ale.loadROM(b'D:\\_code\\breakout.bin')
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    lives = 5

    # set up vars
    skipFrame = 4

    actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2), legal_actions)
    scoreList = list()

    cnn = AlloEggoCnn()
    with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin:
        parms = pickle.load(fin)

    lasagne.layers.set_all_param_values(cnn.a_out, parms)

    frameCount = 0
    st = time.time()
    for episode in range(100):
        total_reward = 0.0
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(
                    imresize(gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], 0.5, interp='nearest'),
                    dtype=dtype)/255

                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    reward += 1

                ram = ale.getRAM()
                if ram[57] != lives:
                    reward -= 1
                    lives = ram[57]

            frames = np.asarray(frames)

            actionVect = cnn.get_output(frames.reshape((1, skipFrame, 105, 80)))[0]
            actionHandler.setAction(actionVect)

            total_reward += reward
            frameCount += 1*skipFrame

        ale.reset_game()
        actionHandler.anneal()
        scoreList.append(total_reward)

        lives = 5



        print("Episode " + str(episode) + " ended with score: " + str(total_reward))

        et = time.time()
        print('Total Time:', et-st, 'Frame Count:', frameCount, 'FPS:',frameCount/(et-st))

    plt.clf()
    plt.plot(scoreList, '.')
    plt.pause(0.01)
    plt.ioff()
Ejemplo n.º 11
0
# start up the python ale interface
ale = ALEInterface()

# load a rom
ale.loadROM(b'd:\_code\_reinforcementlearning\\breakout.bin')

# screen dimensions and legal actions
(screen_width, screen_height) = ale.getScreenDims()
legal_actions = ale.getLegalActionSet()

frameCount = 0
st = time.time()
for episode in range(1):
    total_reward = 0.0
    while not ale.game_over():
        # get a random action
        a = legal_actions[np.random.randint(legal_actions.size)]

        # get gamescreen and convert to usable format (Height x Width x Channels)
        gamescreen = ale.getScreenRGB()
        gamescreen = np.asarray(gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], dtype=np.float)

        # get ram
        ram = ale.getRAM()

        # take the action and get the reward
        reward = ale.act(a)
        total_reward += reward

        frameCount += 1
Ejemplo n.º 12
0
def main():
    import os
    import pickle

    import matplotlib.pyplot as plt
    from learningALE.learners.nns import CNN
    from scipy.misc import imresize

    from learningALE.handlers.actionhandler import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface
    import lasagne
    import numpy as np

    # plt.ion()
    skipFrame = 3
    cnn = CNN((None, skipFrame, 86, 80), 6, .1, stride=(4, 2))
    with open(os.getcwd() + '\datasets\\spccnn.pkl', 'rb') as infile:
        parms = pickle.load(infile)
        lasagne.layers.set_all_param_values(cnn.l_out, parms)

    # rom = b'D:\\_code\\breakout.bin'
    rom = b'D:\\_code\\space_invaders.bin'

    ale = ALEInterface(True)
    ale.loadROM(rom)
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    # get labels
    labels = [
        'noop', 'fire', 'up', 'right', 'left', 'down', 'upright', 'upleft',
        'downright', 'downleft', 'upfire', 'rightfire', 'leftfire', 'downfire',
        'uprightfire', 'upleftfire', 'downrightfire', 'downleftfire'
    ]
    labels = np.asarray(labels)[legal_actions]

    # set up vars
    actionHandler = ActionHandler(ActionPolicy.eGreedy, (.1, .1, 2),
                                  legal_actions)
    rewList = list()
    for ep in range(100):
        total_reward = 0.0
        trainCount = 0
        ale.reset_game()
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(gamescreen.view(np.uint8).reshape(
                    screen_height, screen_width, 4)[25:-12, :, 0],
                                          dtype=np.float32)
                processedImg[processedImg > 1] = 255
                processedImg = imresize(processedImg, 0.5,
                                        interp='nearest') / 255
                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    rew = 1
                reward += rew
            total_reward += reward
            frames = np.asarray(frames, dtype=np.float32)

            actionVect = cnn.get_output(
                frames.reshape((1, skipFrame, frames.shape[1], 80)))[0]
            actionHandler.setAction(actionVect)
            # hid1_act = cnn.get_hid1_act(frames.reshape((1, skip_frame, frames.shape[1], 80)))
            # hid2_act = cnn.get_hid2_act(frames.reshape((1, skip_frame, frames.shape[1], 80)))
            # for x in range(hid1_act.shape[1]):
            #     plt.subplot(4,4,x+1)
            #     plt.imshow(hid1_act[0,x], cmap=plt.cm.gray)
            # for x in range(hid2_act.shape[1]):
            #     plt.subplot(6,6,x+1)
            #     plt.imshow(hid2_act[0,x], cmap=plt.cm.gray)
            # plt.show()
            # plt.clf()
            # plt.plot(actionVect)
            # plt.xticks(range(len(labels)), labels)
            # plt.pause(0.001)
        rewList.append(total_reward)
        print(ep, total_reward)

    print(np.mean(rewList), np.std(rewList), np.max(rewList), np.min(rewList))
    print(np.unique(rewList, return_counts=True))
    plt.plot(rewList)
    plt.show()
Ejemplo n.º 13
0
class GameHandler:
    """
    The :class:`GameHandler` class takes care of the interface between the ALE and the learner.

    Currently supported is the ability to display the ALE screen when running, skip x number of frames by repeating the
    last action, and configuring the dtype to convert the gamescreen to (default is float16 for space).

    Parameters
    ----------
    rom : byte string
        Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin'
    show_rom : boolean
        Whether or not to show the game being played or not. True takes longer to run but can be fun to watch
    skip_frame : int
        Number of frames to skip using the last action chosen
    learner : :class:`learners.learner`
        Default None. The learner, on construction GameHandler will call set_legal_actions. If none then set_legal_actions
        needs to be called
    """
    def __init__(self, rom, show_rom, skip_frame, learner=None):
        # set up emulator
        self.ale = ALEInterface(show_rom)
        self.ale.loadROM(rom)
        (self.screen_width, self.screen_height) = self.ale.getScreenDims()
        legal_actions = self.ale.getMinimalActionSet()

        # set up vars
        self.skipFrame = skip_frame

        if learner:
            learner.set_legal_actions(legal_actions)

        self.total_frame_count = 0

    def run_one_game(self, learner, neg_reward=False, early_return=False, clip=True, max_episode_frame=np.inf):
        """
        Runs a game until ale.game_over() is true.

        Parameters
        ----------
        learner : :class:`learners.learner`
            Will call get_game_action and frames_processed.
            get_game_action must return a valid ALE action ind. frames_processed can be a pass.

        neg_reward : bool
            Default False. Whether or not to use negative rewards, recieved when agent looses a life.

        early_return : bool
            Default False. If set to true and neg_rewards is set then will return on first loss of life

        clip : bool
            Default True. Whether or not to clip positive rewards to 1

        max_episode_frame : int
            Default np.inf. The maximum number of frames to run per episode

        Returns
        -------
        int
            Total reward from game. Can be negative if neg_reward is true.
        """
        total_reward = 0.0
        gamescreen = None
        self.ale.reset_game()
        cur_lives = self.ale.lives()
        action_to_perform = 0  # initially set at zero because we start the game before asking the learner
        while not self.ale.game_over() and self.ale.getEpisodeFrameNumber() < max_episode_frame:
            # get frames
            frames = list()
            reward = 0

            # loop over skip frame
            for frame in range(self.skipFrame):
                gamescreen = self.ale.getScreenGrayscale(gamescreen)

                # convert ALE gamescreen into usable image, scaled between 0 and 1
                processedImg = imresize(gamescreen[33:-16, :, 0], 0.525, interp='nearest')
                frames.append(processedImg)

                # act on the action to perform, should be ALE compatible action ind
                rew = self.ale.act(action_to_perform)

                # clip positive rewards to 1
                if rew > 0 and clip:
                    reward += 1
                else:
                    reward += rew

                # if allowing negative rewards, see if lives has decreased
                if neg_reward:
                    new_lives = self.ale.lives()
                    if new_lives < cur_lives:
                        reward -= 1  # losing a life is a negative 1 reward
                        cur_lives = new_lives

            # end frame skip loop

            total_reward += reward
            frames = np.asarray(frames)

            # frames_processed must be here before action_to_perform gets overwritten.
            learner.frames_processed(frames, action_to_perform, reward)

            action_to_perform = learner.get_game_action()

            self.total_frame_count += 1 * self.skipFrame

            # if doing early return, end game on first loss of life
            if reward < 0 and early_return:
                return total_reward

        # end of game
        return total_reward

    def set_legal_actions(self, learner):
        learner.set_legal_actions(self.ale.getMinimalActionSet())

    def get_legal_actions(self):
        return self.ale.getMinimalActionSet()