Ejemplo n.º 1
0
    def __init__(self, rom, frame_skip=4, show_rom=False):
        # set up emulator
        self.ale = ALEInterface(show_rom)
        self.ale.setInt(b'frame_skip', frame_skip)
        self.ale.loadROM(rom)

        # setup gamescreen object. I think this is faster than recreating an empty each time
        width, height = self.ale.getScreenDims()
        self.gamescreen = np.empty((height, width, 1), dtype=np.uint8)
Ejemplo n.º 2
0
    def __init__(self, rom, show_rom, skip_frame, learner=None):
        # set up emulator
        self.ale = ALEInterface(show_rom)
        self.ale.loadROM(rom)
        (self.screen_width, self.screen_height) = self.ale.getScreenDims()
        legal_actions = self.ale.getMinimalActionSet()

        # set up vars
        self.skipFrame = skip_frame

        if learner:
            learner.set_legal_actions(legal_actions)

        self.total_frame_count = 0
Ejemplo n.º 3
0
def main():
    import time
    import numpy as np
    from learningALE.libs.ale_python_interface import ALEInterface

    # this script is used to try and find what ram index stores the number of lives for a game

    ale = ALEInterface(True)

    ale.loadROM(b'D:\\_code\\beam_rider.bin')

    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getLegalActionSet()

    frameCount = 0
    ramlist = list()
    st = time.time()
    for episode in range(1):
        total_reward = 0.0
        while not ale.game_over():
            a = legal_actions[np.random.randint(legal_actions.size)]
            ram = ale.getRAM()
            ramlist.append(ram)
            reward = ale.act(a)
            total_reward += reward
            frameCount += 1
        print("Episode " + str(episode) + " ended with score: " +
              str(total_reward))
        ale.reset_game()
    et = time.time()
    print(et - st, frameCount / (et - st))

    import matplotlib.pyplot as plt
    ramarray = np.asarray(ramlist)
    w = np.where(ramarray > 3)[1]
    ramarray[:, w] = 0
    plt.plot(ramarray)

    notZ = np.where(ramarray != 0)[1]
    unqNZ = np.unique(notZ)
    print(unqNZ)
Ejemplo n.º 4
0
def main():
    import os
    import pickle
    import time

    import lasagne
    import matplotlib.pyplot as plt
    import numpy as np
    from learners.nns import AlloEggoCnn
    from scipy.misc import imresize

    from learningALE.handlers import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface

    dtype = np.float16
    plt.ion()

    # set up emulator
    ale = ALEInterface(True)
    ale.loadROM(b'D:\\_code\\breakout.bin')
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    lives = 5

    # set up vars
    skipFrame = 4

    actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2),
                                  legal_actions)
    scoreList = list()

    cnn = AlloEggoCnn()
    with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin:
        parms = pickle.load(fin)

    lasagne.layers.set_all_param_values(cnn.a_out, parms)

    frameCount = 0
    st = time.time()
    for episode in range(100):
        total_reward = 0.0
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(imresize(gamescreen.view(
                    np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0],
                                                   0.5,
                                                   interp='nearest'),
                                          dtype=dtype) / 255

                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    reward += 1

                ram = ale.getRAM()
                if ram[57] != lives:
                    reward -= 1
                    lives = ram[57]

            frames = np.asarray(frames)

            actionVect = cnn.get_output(frames.reshape(
                (1, skipFrame, 105, 80)))[0]
            actionHandler.setAction(actionVect)

            total_reward += reward
            frameCount += 1 * skipFrame

        ale.reset_game()
        actionHandler.anneal()
        scoreList.append(total_reward)

        lives = 5

        print("Episode " + str(episode) + " ended with score: " +
              str(total_reward))

        et = time.time()
        print('Total Time:', et - st, 'Frame Count:', frameCount, 'FPS:',
              frameCount / (et - st))

    plt.clf()
    plt.plot(scoreList, '.')
    plt.pause(0.01)
    plt.ioff()
Ejemplo n.º 5
0
__author__ = 'Ben'
import time

import numpy as np

from learningALE.libs.ale_python_interface import ALEInterface
"""
This example is meant for those wanting to play around with GameHandler, or implement their own ALE interface.
For people that want a plug and play interface use :class:`handlers.GameHandler`
"""

# start up the python ale interface
ale = ALEInterface()

# load a rom
ale.loadROM(b'd:\_code\_reinforcementlearning\\breakout.bin')

# screen dimensions and legal actions
(screen_width, screen_height) = ale.getScreenDims()
legal_actions = ale.getLegalActionSet()

frameCount = 0
st = time.time()
for episode in range(1):
    total_reward = 0.0
    while not ale.game_over():
        # get a random action
        a = legal_actions[np.random.randint(legal_actions.size)]

        # get gamescreen and convert to usable format (Height x Width x Channels)
        gamescreen = ale.getScreenRGB()
Ejemplo n.º 6
0
    10,  #10011 fire up/down (invalid)
    12,  #10100 fire left
    15,  #10101 fire up/left
    17,  #10110 fire down/left
    15,  #10111 fire up/down/left (invalid)
    11,  #11000 fire right
    14,  #11001 fire up/right
    16,  #11010 fire down/right
    14,  #11011 fire up/down/right (invalid)
    11,  #11100 fire left/right (invalid)
    14,  #11101 fire left/right/up (invalid)
    16,  #11110 fire left/right/down (invalid)
    14  #11111 fire up/down/left/right (invalid)
)

ale = ALEInterface(False)
rom = b'D:\\_code\\montezuma_revenge.bin'

ale.loadROM(rom)
legal_actions = ale.getMinimalActionSet()
print(legal_actions)

(screen_width, screen_height) = ale.getScreenDims()
print("width/height: " + str(screen_width) + "/" + str(screen_height))

(display_width, display_height) = (1024, 420)

#init pygame
pygame.init()
screen = pygame.display.set_mode((display_width, display_height))
pygame.display.set_caption("Arcade Learning Environment Player Agent Display")
Ejemplo n.º 7
0
def main():
    import os
    import pickle

    import matplotlib.pyplot as plt
    from learningALE.learners.nns import CNN
    from scipy.misc import imresize

    from learningALE.handlers.actionhandler import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface
    import lasagne
    import numpy as np

    # plt.ion()
    skipFrame = 3
    cnn = CNN((None, skipFrame, 86, 80), 6, .1, stride=(4, 2))
    with open(os.getcwd() + '\datasets\\spccnn.pkl', 'rb') as infile:
        parms = pickle.load(infile)
        lasagne.layers.set_all_param_values(cnn.l_out, parms)

    # rom = b'D:\\_code\\breakout.bin'
    rom = b'D:\\_code\\space_invaders.bin'

    ale = ALEInterface(True)
    ale.loadROM(rom)
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    # get labels
    labels = [
        'noop', 'fire', 'up', 'right', 'left', 'down', 'upright', 'upleft',
        'downright', 'downleft', 'upfire', 'rightfire', 'leftfire', 'downfire',
        'uprightfire', 'upleftfire', 'downrightfire', 'downleftfire'
    ]
    labels = np.asarray(labels)[legal_actions]

    # set up vars
    actionHandler = ActionHandler(ActionPolicy.eGreedy, (.1, .1, 2),
                                  legal_actions)
    rewList = list()
    for ep in range(100):
        total_reward = 0.0
        trainCount = 0
        ale.reset_game()
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(gamescreen.view(np.uint8).reshape(
                    screen_height, screen_width, 4)[25:-12, :, 0],
                                          dtype=np.float32)
                processedImg[processedImg > 1] = 255
                processedImg = imresize(processedImg, 0.5,
                                        interp='nearest') / 255
                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    rew = 1
                reward += rew
            total_reward += reward
            frames = np.asarray(frames, dtype=np.float32)

            actionVect = cnn.get_output(
                frames.reshape((1, skipFrame, frames.shape[1], 80)))[0]
            actionHandler.setAction(actionVect)
            # hid1_act = cnn.get_hid1_act(frames.reshape((1, skip_frame, frames.shape[1], 80)))
            # hid2_act = cnn.get_hid2_act(frames.reshape((1, skip_frame, frames.shape[1], 80)))
            # for x in range(hid1_act.shape[1]):
            #     plt.subplot(4,4,x+1)
            #     plt.imshow(hid1_act[0,x], cmap=plt.cm.gray)
            # for x in range(hid2_act.shape[1]):
            #     plt.subplot(6,6,x+1)
            #     plt.imshow(hid2_act[0,x], cmap=plt.cm.gray)
            # plt.show()
            # plt.clf()
            # plt.plot(actionVect)
            # plt.xticks(range(len(labels)), labels)
            # plt.pause(0.001)
        rewList.append(total_reward)
        print(ep, total_reward)

    print(np.mean(rewList), np.std(rewList), np.max(rewList), np.min(rewList))
    print(np.unique(rewList, return_counts=True))
    plt.plot(rewList)
    plt.show()