Ejemplo n.º 1
0
def main():
    import os
    import pickle
    import time

    import lasagne
    import matplotlib.pyplot as plt
    import numpy as np
    from learners.nns import AlloEggoCnn
    from scipy.misc import imresize

    from learningALE.handlers import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface

    dtype = np.float16
    plt.ion()

    # set up emulator
    ale = ALEInterface(True)
    ale.loadROM(b'D:\\_code\\breakout.bin')
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    lives = 5

    # set up vars
    skipFrame = 4

    actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2),
                                  legal_actions)
    scoreList = list()

    cnn = AlloEggoCnn()
    with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin:
        parms = pickle.load(fin)

    lasagne.layers.set_all_param_values(cnn.a_out, parms)

    frameCount = 0
    st = time.time()
    for episode in range(100):
        total_reward = 0.0
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(imresize(gamescreen.view(
                    np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0],
                                                   0.5,
                                                   interp='nearest'),
                                          dtype=dtype) / 255

                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    reward += 1

                ram = ale.getRAM()
                if ram[57] != lives:
                    reward -= 1
                    lives = ram[57]

            frames = np.asarray(frames)

            actionVect = cnn.get_output(frames.reshape(
                (1, skipFrame, 105, 80)))[0]
            actionHandler.setAction(actionVect)

            total_reward += reward
            frameCount += 1 * skipFrame

        ale.reset_game()
        actionHandler.anneal()
        scoreList.append(total_reward)

        lives = 5

        print("Episode " + str(episode) + " ended with score: " +
              str(total_reward))

        et = time.time()
        print('Total Time:', et - st, 'Frame Count:', frameCount, 'FPS:',
              frameCount / (et - st))

    plt.clf()
    plt.plot(scoreList, '.')
    plt.pause(0.01)
    plt.ioff()
Ejemplo n.º 2
0
ale.loadROM(b'd:\_code\_reinforcementlearning\\breakout.bin')

# screen dimensions and legal actions
(screen_width, screen_height) = ale.getScreenDims()
legal_actions = ale.getLegalActionSet()

frameCount = 0
st = time.time()
for episode in range(1):
    total_reward = 0.0
    while not ale.game_over():
        # get a random action
        a = legal_actions[np.random.randint(legal_actions.size)]

        # get gamescreen and convert to usable format (Height x Width x Channels)
        gamescreen = ale.getScreenRGB()
        gamescreen = np.asarray(gamescreen.view(np.uint8).reshape(
            screen_height, screen_width, 4)[:, :, 0],
                                dtype=np.float)

        # get ram
        ram = ale.getRAM()

        # take the action and get the reward
        reward = ale.act(a)
        total_reward += reward

        frameCount += 1

    print("Episode " + str(episode) + " ended with score: " +
          str(total_reward))
def main():
    import os
    import pickle

    import matplotlib.pyplot as plt
    from learningALE.learners.nns import CNN
    from scipy.misc import imresize

    from learningALE.handlers.actionhandler import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface
    import lasagne
    import numpy as np

    # plt.ion()
    skipFrame = 3
    cnn = CNN((None, skipFrame, 86, 80), 6, .1, stride=(4,2))
    with open(os.getcwd()+'\datasets\\spccnn.pkl', 'rb') as infile:
        parms = pickle.load(infile)
        lasagne.layers.set_all_param_values(cnn.l_out, parms)

    # rom = b'D:\\_code\\breakout.bin'
    rom = b'D:\\_code\\space_invaders.bin'

    ale = ALEInterface(True)
    ale.loadROM(rom)
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    # get labels
    labels = ['noop', 'fire', 'up', 'right', 'left', 'down', 'upright', 'upleft', 'downright', 'downleft', 'upfire', 'rightfire', 'leftfire', 'downfire', 'uprightfire'
              , 'upleftfire', 'downrightfire', 'downleftfire']
    labels = np.asarray(labels)[legal_actions]

    # set up vars
    actionHandler = ActionHandler(ActionPolicy.eGreedy, (.1, .1, 2), legal_actions)
    rewList = list()
    for ep in range(100):
        total_reward = 0.0
        trainCount = 0
        ale.reset_game()
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(
                    gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[25:-12, :, 0],
                    dtype=np.float32)
                processedImg[processedImg > 1] = 255
                processedImg = imresize(processedImg, 0.5, interp='nearest')/255
                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    rew = 1
                reward += rew
            total_reward += reward
            frames = np.asarray(frames, dtype=np.float32)

            actionVect = cnn.get_output(frames.reshape((1, skipFrame, frames.shape[1], 80)))[0]
            actionHandler.setAction(actionVect)
            # hid1_act = cnn.get_hid1_act(frames.reshape((1, skip_frame, frames.shape[1], 80)))
            # hid2_act = cnn.get_hid2_act(frames.reshape((1, skip_frame, frames.shape[1], 80)))
            # for x in range(hid1_act.shape[1]):
            #     plt.subplot(4,4,x+1)
            #     plt.imshow(hid1_act[0,x], cmap=plt.cm.gray)
            # for x in range(hid2_act.shape[1]):
            #     plt.subplot(6,6,x+1)
            #     plt.imshow(hid2_act[0,x], cmap=plt.cm.gray)
            # plt.show()
            # plt.clf()
            # plt.plot(actionVect)
            # plt.xticks(range(len(labels)), labels)
            # plt.pause(0.001)
        rewList.append(total_reward)
        print(ep, total_reward)


    print(np.mean(rewList), np.std(rewList), np.max(rewList), np.min(rewList))
    print(np.unique(rewList, return_counts=True))
    plt.plot(rewList)
    plt.show()
Ejemplo n.º 4
0
def main():
    import os
    import pickle
    import time

    import lasagne
    import matplotlib.pyplot as plt
    import numpy as np
    from learners.nns import AlloEggoCnn
    from scipy.misc import imresize

    from learningALE.handlers import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface

    dtype = np.float16
    plt.ion()

    # set up emulator
    ale = ALEInterface(True)
    ale.loadROM(b'D:\\_code\\breakout.bin')
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    lives = 5

    # set up vars
    skipFrame = 4

    actionHandler = ActionHandler(ActionPolicy.eGreedy, (0.1, 0.1, 2), legal_actions)
    scoreList = list()

    cnn = AlloEggoCnn()
    with open(os.getcwd() + '\saves\cnnbestalloego.pkl', 'rb') as fin:
        parms = pickle.load(fin)

    lasagne.layers.set_all_param_values(cnn.a_out, parms)

    frameCount = 0
    st = time.time()
    for episode in range(100):
        total_reward = 0.0
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(
                    imresize(gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], 0.5, interp='nearest'),
                    dtype=dtype)/255

                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    reward += 1

                ram = ale.getRAM()
                if ram[57] != lives:
                    reward -= 1
                    lives = ram[57]

            frames = np.asarray(frames)

            actionVect = cnn.get_output(frames.reshape((1, skipFrame, 105, 80)))[0]
            actionHandler.setAction(actionVect)

            total_reward += reward
            frameCount += 1*skipFrame

        ale.reset_game()
        actionHandler.anneal()
        scoreList.append(total_reward)

        lives = 5



        print("Episode " + str(episode) + " ended with score: " + str(total_reward))

        et = time.time()
        print('Total Time:', et-st, 'Frame Count:', frameCount, 'FPS:',frameCount/(et-st))

    plt.clf()
    plt.plot(scoreList, '.')
    plt.pause(0.01)
    plt.ioff()
Ejemplo n.º 5
0
ale.loadROM(b'd:\_code\_reinforcementlearning\\breakout.bin')

# screen dimensions and legal actions
(screen_width, screen_height) = ale.getScreenDims()
legal_actions = ale.getLegalActionSet()

frameCount = 0
st = time.time()
for episode in range(1):
    total_reward = 0.0
    while not ale.game_over():
        # get a random action
        a = legal_actions[np.random.randint(legal_actions.size)]

        # get gamescreen and convert to usable format (Height x Width x Channels)
        gamescreen = ale.getScreenRGB()
        gamescreen = np.asarray(gamescreen.view(np.uint8).reshape(screen_height, screen_width, 4)[:, :, 0], dtype=np.float)

        # get ram
        ram = ale.getRAM()

        # take the action and get the reward
        reward = ale.act(a)
        total_reward += reward

        frameCount += 1

    print("Episode " + str(episode) + " ended with score: " + str(total_reward))
    # game over man game over, reset
    ale.reset_game()
Ejemplo n.º 6
0
def main():
    import os
    import pickle

    import matplotlib.pyplot as plt
    from learningALE.learners.nns import CNN
    from scipy.misc import imresize

    from learningALE.handlers.actionhandler import ActionHandler, ActionPolicy
    from learningALE.libs.ale_python_interface import ALEInterface
    import lasagne
    import numpy as np

    # plt.ion()
    skipFrame = 3
    cnn = CNN((None, skipFrame, 86, 80), 6, .1, stride=(4, 2))
    with open(os.getcwd() + '\datasets\\spccnn.pkl', 'rb') as infile:
        parms = pickle.load(infile)
        lasagne.layers.set_all_param_values(cnn.l_out, parms)

    # rom = b'D:\\_code\\breakout.bin'
    rom = b'D:\\_code\\space_invaders.bin'

    ale = ALEInterface(True)
    ale.loadROM(rom)
    (screen_width, screen_height) = ale.getScreenDims()
    legal_actions = ale.getMinimalActionSet()
    # get labels
    labels = [
        'noop', 'fire', 'up', 'right', 'left', 'down', 'upright', 'upleft',
        'downright', 'downleft', 'upfire', 'rightfire', 'leftfire', 'downfire',
        'uprightfire', 'upleftfire', 'downrightfire', 'downleftfire'
    ]
    labels = np.asarray(labels)[legal_actions]

    # set up vars
    actionHandler = ActionHandler(ActionPolicy.eGreedy, (.1, .1, 2),
                                  legal_actions)
    rewList = list()
    for ep in range(100):
        total_reward = 0.0
        trainCount = 0
        ale.reset_game()
        while not ale.game_over():
            # get frames
            frames = list()
            reward = 0
            for frame in range(skipFrame):
                gamescreen = ale.getScreenRGB()
                processedImg = np.asarray(gamescreen.view(np.uint8).reshape(
                    screen_height, screen_width, 4)[25:-12, :, 0],
                                          dtype=np.float32)
                processedImg[processedImg > 1] = 255
                processedImg = imresize(processedImg, 0.5,
                                        interp='nearest') / 255
                frames.append(processedImg)

                performedAction, actionInd = actionHandler.getLastAction()
                rew = ale.act(performedAction)
                if rew > 0:
                    rew = 1
                reward += rew
            total_reward += reward
            frames = np.asarray(frames, dtype=np.float32)

            actionVect = cnn.get_output(
                frames.reshape((1, skipFrame, frames.shape[1], 80)))[0]
            actionHandler.setAction(actionVect)
            # hid1_act = cnn.get_hid1_act(frames.reshape((1, skip_frame, frames.shape[1], 80)))
            # hid2_act = cnn.get_hid2_act(frames.reshape((1, skip_frame, frames.shape[1], 80)))
            # for x in range(hid1_act.shape[1]):
            #     plt.subplot(4,4,x+1)
            #     plt.imshow(hid1_act[0,x], cmap=plt.cm.gray)
            # for x in range(hid2_act.shape[1]):
            #     plt.subplot(6,6,x+1)
            #     plt.imshow(hid2_act[0,x], cmap=plt.cm.gray)
            # plt.show()
            # plt.clf()
            # plt.plot(actionVect)
            # plt.xticks(range(len(labels)), labels)
            # plt.pause(0.001)
        rewList.append(total_reward)
        print(ep, total_reward)

    print(np.mean(rewList), np.std(rewList), np.max(rewList), np.min(rewList))
    print(np.unique(rewList, return_counts=True))
    plt.plot(rewList)
    plt.show()