예제 #1
0
def playFlappyBird():
    episodeMemory = []
    # Step 1: init BrainDQN
    actions = 2
    brain = BrainDQN(actions)
    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([1, 0])  # do nothing
    observation0, reward0, terminal = flappyBird.frame_step(action0)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    brain.setInitState(observation0)

    # Step 3.2: run the game
    while 1 != 0:
        action = np.array([1, 0])
        for event in pygame.event.get():
            if event.type == KEYDOWN and event.key == K_UP:
                action = np.array([0, 1])
            if event.type == QUIT or (event.type == KEYDOWN
                                      and event.key == K_ESCAPE):
                pygame.quit()
                sys.exit()
        nextObservation, reward, terminal = flappyBird.frame_step(action)
        nextObservation = preprocess(nextObservation)
        if terminal:
            episodeMemory.append([nextObservation, action, reward, terminal])
            save2file(episodeMemory)
            episodeMemory = []
        else:
            episodeMemory.append([nextObservation, action, reward, terminal])
예제 #2
0
def run_game():
    game = Game()
    RL = BrainDQN(actions=8)
    step = 0
    for episode in range(500):
        # initial observation
        observation = game.reset_map()
        # print(observation.shape)
        RL.setInitState(observation)
        while True:
            # RL choose action based on observation
            action = RL.getAction()
            # for i in range(6):
            #     print(game.states[:,:,i])
            # print("action:", action)
            # RL take action and get next observation and reward
            observation_, reward, done = game.step(action)
            # for i in range(6):
            #     print(game.states[:,:,i])
            # print("reward:",reward)
            # print("done:", done)
            RL.setPerception(observation_, action, reward, done)

            # break while loop when end of this episode
            if done:
                # print("done,reset")
                observation = game.reset_map()
                RL.setInitState(observation)
def playFlappyBird():
    # Step 1: init BrainDQN
    actions = 5
    brain = BrainDQN(actions, imgDim)
    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([0,1,0,0,0])  # do nothing
    observation0, reward0, terminal = flappyBird.frame_step(action0)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (imgDim[0],imgDim[1])), cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY)
    brain.setInitState(observation0)   #将observation0复制4份放进BrainDQN的属性self.currentState中

    filename = "./expertData/observation"
    '''
    第1次试验用,其他全注释
    '''
#    actInd = 0
#    np.save(filename + str(actInd), observation0)
    
    actInd = 1045 #上次记录的最后一个数字
    # Step 3.2: run the game
    while 1!= 0:
#		action = brain.getAction()5
        act = 0
        while(act not in [2,5,8,6,4]):
            act = input("Please intput your action:")
            if(act == ''): continue
            act = int(act)
            
        if(act == 2): action = np.array([1,0,0,0,0])
        if(act == 5): action = np.array([0,1,0,0,0])
        if(act == 8): action = np.array([0,0,1,0,0])
        if(act == 6): action = np.array([0,0,0,1,0])
        if(act == 4): action = np.array([0,0,0,0,1])
       
#        ExpertAct.append(action.tolist())
        nextObservation,reward,terminal = flappyBird.frame_step(action)
        actInd += 1
        np.save(filename + str(actInd), nextObservation)
        np.save(filename + "action" + str(actInd), action)
        np.save(filename + "reward" + str(actInd), reward)
        np.save(filename + "terminal" + str(actInd), terminal)
        
        nextObservation = preprocess(nextObservation)
예제 #4
0
def playFlappyBird():
    # Step 1: init BrainDQN
    actions = 2
    brain = BrainDQN(actions)
    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([1, 0])  # do nothing
    observation0, reward0, terminal = flappyBird.frame_step(action0)

    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)

    brain.setInitState(observation0)

    # Step 3.2: run the game
    while 1 != 0:
        action = brain.getAction()  # 对初始的状态有action反馈 at
        nextObservation, reward, terminal = flappyBird.frame_step(
            action)  # 执行器获得指令,并输出该指令的奖励r(t),执行该指令导致的观测 o(t+1),
        nextObservation = preprocess(nextObservation)

        tmp_img = showThreshImg(nextObservation)  # 从flappyBird中得到的图像是一个旋转加镜像的
        cv2.imshow("process", tmp_img)
        cv2.waitKey(1)

        brain.setPerception(nextObservation, action, reward, terminal)
예제 #5
0
def playFlappyBird():
    # Step 1: init BrainDQN
    actions = 2
    brain = BrainDQN(actions)
    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([1, 0])  # do nothing
    observation0, reward0, terminal = flappyBird.frame_step(action0)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    print observation0.shape
    brain.setInitState(observation0)

    # Step 3.2: run the game
    while 1 != 0:
        action = brain.getAction()
        print action
        nextObservation, reward, terminal = flappyBird.frame_step(action)
        nextObservation = preprocess(nextObservation)
        brain.setPerception(nextObservation, action, reward, terminal)
        cv2.imshow('Video', 0)
        if cv2.waitKey(1) & 0xFF == 27:
            break
예제 #6
0
def playSnake():
    # Step 1: init BrainDQN
    actions = 4
    top = 0
    brain = BrainDQN(actions)
    # Step 2: init Plane Game
    GluttonousSnake = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([0, 0, 0,
                        1])  # [1,0,0]do nothing,[0,1,0]left,[0,0,1]right
    observation0, reward0, terminal, score = GluttonousSnake.frame_step(
        action0)

    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    # cv2.imwrite("3.jpg",observation0,[int(cv2.IMWRITE_JPEG_QUALITY),5])

    brain.setInitState(observation0)

    # Step 3.2: run the game
    while 1 != 0:
        action = brain.getAction()
        nextObservation, reward, terminal, score = GluttonousSnake.frame_step(
            action)
        nextObservation = preprocess(nextObservation)
        brain.setPerception(nextObservation, action, reward, terminal)
        if score > top:
            top = score

        print('top:%u' % top)
예제 #7
0
def playHapplyDemo():
	# Step 1: init BrainDQN
	actions = 4
	brain = BrainDQN(actions)
	# Step 2: init Flappy Bird Game
	happyDemo = game.GameState()

	action0 = np.array([1,0,0,0])  # do nothing
	#action0 = np.array([1, 0])  # do nothing
	observation0, reward0, terminal = happyDemo.frame_step(action0)
	observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY)
	ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
	brain.setInitState(observation0)

	t = threading.Thread(target=playStep,args=(happyDemo,brain,))
	t.start()
	happyDemo.show()
	stop_thread(t)
예제 #8
0
def PlaySurvalNew():
    logging.basicConfig(filename='logs/myplayer.log', level=logging.INFO)
    logging.info('Started')
    # 9个方向
    action_space = ['u', 'd', 'l', 'r', 'a', 'x', 's', 'w', 'e']
    n_actions = len(action_space)
    train = BrainDQN(n_actions)
    # 初始化随机地图类
    genermap = map()
    TrainGame = Game()
    mapnum = genermap.init_battle_map()
    for i in range(mapnum):
        #计算每张地图循环多少次
        looptimes = genermap.calclooptimes(i)
        logging.info('loop %d,i %d', looptimes, i)
        for j in range(looptimes):
            flag = False
            index = j
            if j >= looptimes / 2:
                flag = True
                index = j - looptimes
            # 初始化随机地图信息
            reservelist = genermap.fillplayerpositon(index, i, flag)
            # 产生毒气
            posionlist = genermap.GeneratorPosion(i)
            # 产生道具
            genermap.GeneratorTool(reservelist, posionlist, i)
            # 初始化Game环境
            TrainGame.binary_env_reset(genermap.cacheMap, genermap.PosionMap)
            train.setInitState(TrainGame.binary_env)
            # 循环训练1W次,换一张地图
            for episode in range(5000):
                # 随机取一个方向
                action = train.getAction()
                # 计算该方向的reward
                nextObservation, reward, terminal = TrainGame.binary_step(
                    action)
                # 设置到训练集
                train.setPerception(nextObservation, action, reward, terminal)
                if terminal == True:
                    TrainGame.binary_env_reset(genermap.cacheMap,
                                               genermap.PosionMap)
                    train.setInitState(TrainGame.binary_env)
예제 #9
0
def HighSpeedRacing():
    # Step 1: init BrainDQN
    actions = 5
    brain = BrainDQN(actions, imgDim)
    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([0, 1, 0, 0, 0])  # do nothing
    observation0, reward0, terminal = flappyBird.frame_step(action0)
    print(observation0)
    #    print('observation0 1:',observation0)
    #    observation0 = cv2.cvtColor(cv2.resize(observation0, (imgDim[0],imgDim[1])), cv2.COLOR_BGR2GRAY)
    #    ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY)
    brain.setInitState(
        observation0,
        action0)  #将observation0复制4份放进BrainDQN的属性self.currentState中

    #    isUseExpertData = False
    ##    isUseExpertData = True
    #    if(isUseExpertData == True):
    #        filename = "./expertData/observation"
    #        actInd = 0
    #        observation0 = np.load(filename + str(actInd) + ".npy")
    #        plt.imshow(observation0)
    #    #    # Step 3.2: run the game
    #    #    while 1!= 0:
    #        for _ in range(1):
    #            actInd = 0
    #            for actInd in range(1,2073):
    #                actInd += 1
    #                action = np.load(filename + "action" + str(actInd) + ".npy")
    #                reward = np.load(filename + "reward" + str(actInd) + ".npy")
    #                terminal = np.load(filename + "terminal" + str(actInd) + ".npy")
    #                nextObservation = np.load(filename + str(actInd) + ".npy")
    #                plt.imshow(nextObservation)
    #                nextObservation = preprocess(nextObservation)
    #                brain.setPerception(nextObservation,action,reward,terminal)
    loss = []
    plt.figure()
    ind = 0
    # Step 3.2: run the game
    while 1 != 0:
        #        time.sleep(0.1)
        action = brain.getAction()
        loss.append(brain.loss_temp)
        ind += 1
        if ind % 500 == 499:
            plt.plot(loss)
            plt.show()
        nextObservation, reward, terminal = flappyBird.frame_step(action)
        #        nextObservation = preprocess(nextObservation)
        brain.setPerception(nextObservation, action, reward, terminal)
예제 #10
0
def playSurvival():
    logging.basicConfig(filename='logs/myplayer.log', level=logging.INFO)
    logging.info('Started')
    # 9个方向
    action_space = ['u', 'd', 'l', 'r', 'a', 'x', 's', 'w', 'e']
    n_actions = len(action_space)
    train = BrainDQN(n_actions)
    # 初始化随机地图类
    genermap = map()
    TrainGame = Game()
    for i in range(6000000):
        loop = 0
        loop += int(i / 1000000)
        print("loop:", loop)
        logging.info('loop %d,i %d', loop, i)
        # 初始化随机地图信息
        genermap.init_battle_map()
        reservelist = genermap.fillramdomplayer(loop)
        print("Posion start")
        #产生毒气
        posionlist = genermap.GeneratorPosion(i)
        print("Posion end")
        #产生道具
        genermap.GeneratorTool(reservelist, posionlist, i)
        # 初始化Game环境
        TrainGame.binary_env_reset(genermap.cacheMap, genermap.PosionMap)
        train.setInitState(TrainGame.binary_env)
        # 循环训练1W次,换一张地图
        for episode in range(2000):
            # 随机取一个方向
            action = train.getAction()
            # 计算该方向的reward
            nextObservation, reward, terminal = TrainGame.binary_step(action)
            # 设置到训练集
            train.setPerception(nextObservation, action, reward, terminal)
            if terminal == True:
                TrainGame.binary_env_reset(genermap.cacheMap,
                                           genermap.PosionMap)
                train.setInitState(TrainGame.binary_env)
                break
예제 #11
0
def playGame():
    game = GameObject()
    game.train = TRAIN
    actions = 3
    brain = BrainDQN(actions, game)
    action0 = np.array([1, 0, 0])  # do nothing
    observation0, _ = game.frame_step(action0)
    observation0 = cv2.resize(observation0, (IMG_HEIGHT, IMG_WIDTH))
    observation0 = cv2.cvtColor(observation0, cv2.COLOR_BGR2GRAY)

    brain.setInitState(observation0)

    while True:
        action = brain.getAction()
        nextObservation, reward = game.frame_step(action)
        nextObservation = preprocess(nextObservation)
        brain.setPerception(nextObservation, action, reward)
def playFlappyBird():
    actions = 2
    brain = BrainDQN(actions)
    flappyBird = game.GameState()
    action0 = np.array([1, 0])
    observation0, reward0, terminal = flappyBird.frame_step(action0)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)

    brain.setInitState(observation0)

    while 1 != 0:
        action = brain.getAction()
        nextObservation, reward, terminal = flappyBird.frame_step(action)
        nextObservation = preprocess(nextObservation)
        brain.setPerception(nextObservation, action, reward, terminal)
예제 #13
0
def playGame():
    # Step 0: Define reort
    win = 0
    lose = 0
    points = 0
    # Step 1: init BrainDQN
    actions = 5
    brain = BrainDQN(actions)
    # Step 2: init Game
    bg = Game()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = bg.gen_action([1, 0, 0, 0, 0])  # do nothing
    observation0, reward0, terminal = bg.step(action0)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    brain.setInitState(observation0)

    # Step 3.2: run the game
    while True:
        pygame.event.get()  #讓遊戲畫面能夠更新
        action = bg.gen_action(brain.getAction())
        Observation, reward, terminal = bg.step(action)
        nextObservation = preprocess(Observation)
        brain.setPerception(nextObservation, action, reward, terminal)

        ########################  統計輸出報表用  ########################
        points += reward
        if terminal == True:
            win += points
            lose += 1
            points = 0
            bg = Game()
        print("Lost cnt:", lose, " ,win_points:", round(points, 2), " ,cnt:",
              brain.timeStep)
        if brain.timeStep % 1000 == 0:
            learn_rate.append(lose)
            win_cnt.append(win)
            plt.plot(learn_rate, "g")
            plt.plot(win_cnt, "r")
            plt.show()
            lose = 0
            win = 0
예제 #14
0
파일: PlaneDQN.py 프로젝트: okjer/gameAI
def playPlane():
    # Step 1: init BrainDQN
    actions = 3
    brain = BrainDQN(actions)
    # Step 2: init Plane Game
    plane = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([1, 0, 0])  # [1,0,0]do nothing,[0,1,0]left,[0,0,1]right
    observation0, reward0, terminal = plane.frame_step(action0)

    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    brain.setInitState(observation0)

    # Step 3.2: run the game
    while 1 != 0:
        action = brain.getAction()
        nextObservation, reward, terminal = plane.frame_step(action)
        nextObservation = preprocess(nextObservation)
        brain.setPerception(nextObservation, action, reward, terminal)
예제 #15
0
def playFlappyBird():
    # Step 1: init BrainDQN
    actions = 2
    brain = BrainDQN(actions)

    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()

    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([1, 0])  # do nothing
    observation0, reward0, terminal = flappyBird.frame_step(action0)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY)
    _, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    # observation0 = preprocess(observation0) # 为何初始化brain时使用的图片是[80,80],而不是[80,80,1] => 因为setInitState()输入的是4张重叠的图片
    brain.setInitState(observation0)

    # Step 3.2: run the game
    while True:
        action = brain.getAction() # 基于当前observation获取下一个action
        nextObservation, reward, terminal = flappyBird.frame_step(action) # 采取行动action
        nextObservation = preprocess(nextObservation)
        brain.setPerception(nextObservation, action, reward, terminal) # 学习这个行动带来的回报
예제 #16
0
def playFlappyBird():
	# Step 1: init BrainDQN
	actions = 2
	brain = BrainDQN(actions)
	# Step 2: init Flappy Bird Game
	flappyBird = game.GameState()
	# Step 3: play game
	# Step 3.1: obtain init state
	action0 = np.array([1,0])  # do nothing
	observation0, reward0, terminal = flappyBird.frame_step(action0)
	observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY)
	ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY)
	brain.setInitState(observation0)

	# Step 3.2: run the game
	while 1!= 0:
		action = brain.getAction()
		nextObservation,reward,terminal = flappyBird.frame_step(action)
		nextObservation = preprocess(nextObservation)
		brain.setPerception(nextObservation,action,reward,terminal)
예제 #17
0
def playFlappyBird():
	# Step 1: init BrainDQN
	actions = 2
	brain = BrainDQN(actions)
	# Step 2: init Flappy Bird Game
	flappyBird = game.GameState()
	flappyBird.reset_max_score()
	# Step 3: play game
	# Step 3.1: obtain init state
	action0 = np.array([1,0])  # do nothing
	observation0, reward0, terminal = flappyBird.frame_step(action0)
	observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY)
	ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY)
	brain.setInitState(observation0)

	# Step 3.2: run the game
	while 1!= 0:
		action = brain.getAction()
		nextObservation,reward,terminal = flappyBird.frame_step(action)
		nextObservation = preprocess(nextObservation)
		brain.setPerception(nextObservation,action,reward,terminal)
		# "/ STATE", brain.state
		print("TS {}/AVG {:.2f}/EPS {:.4f}/MAX {}/TURN {}".format(brain.timeStep,
		    flappyBird.avg_score, brain.epsilon, flappyBird.max_score, flappyBird.turns))
예제 #18
0
def playFlappyBird():
	# Step 1: init BrainDQN
	actions = 2
	brain = BrainDQN(actions)
	# Step 2: init Flappy Bird Game
	flappyBird = game.GameState()
	# Step 3: play game
	# Step 3.1: obtain init state
	action0 = np.array([1,0])  # do nothing
	observation0, reward0, terminal = flappyBird.frame_step(action0)
	observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY)
	ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY)
	brain.setInitState(observation0.astype(np.float32))

	# Step 3.2: run the game
	writer = tf.summary.create_file_writer("tf_logs")
	with writer.as_default():
		while 1!= 0:
			action = brain.getAction()
			nextObservation,reward,terminal = flappyBird.frame_step(action)
			nextObservation = preprocess(nextObservation)
			step, loss = brain.setPerception(nextObservation,action,reward,terminal)
			tf.summary.scalar("loss", loss, step=step)
			writer.flush()
예제 #19
0
        #    self.action_rel[2]=16
        #return self.action_rel


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--ip', help='server ip')
    parser.add_argument('--port', help='server port', default="11111")
    args = parser.parse_args()

    env = sc.SingleBattleEnv(args.ip, args.port)
    env.seed(123)
    agent = RandomAgent(env.action_space)

    episodes = 0
    brain = BrainDQN(7)
    p_state = {}
    obs, p_state = env.reset()
    action = agent.act()
    brain.setInitState(obs)
    #因为reset的延时问题导致刚开始的几帧画面没有units,所以此处加上异常处理机制
    #try:
    while ((len(p_state['units_myself']) == 0)
           or (len(p_state['units_enemy']) == 0)):
        #print "p_state",p_state
        obs, p_state = env.reset()
        action = agent.act()
        brain.setInitState(obs)
    #finally:
    #    print "the frame of StarCraft is going on"
예제 #20
0
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 14 10:14:22 2018

@author: qxb-810
"""
from __future__ import print_function
import gym
import cv2
import numpy as np
from moviepy.editor import VideoClip
from BrainDQN_Nature import BrainDQN
from brick import ImageProcess, ENV_NAME

env = gym.make(ENV_NAME)
agent = BrainDQN(env.action_space.n, e_greedy=False, print_log=False)

n_episode = 10
best = 0
done = True
rewards = []
frames = []
buffer = []
fps = 25


def make_frame(t):
    global done, env, agent
    if done:
        state = env.reset()
        agent.setInitState(ImageProcess.reshapeHalf(state))
예제 #21
0
def main():
    env = gym.make(ENV_NAME)

    agent = BrainDQN(env.action_space.n)

    try:
        for episode in range(EPISODE):
            state = env.reset()
            state = ImageProcess.reshapeHalf(state)

            agent.setInitState(state)
            life = 5
            for step in range(MAX_STEP):
                # env.render()
                action = agent.getAction()
                next_state, reward, done, info = env.step(action)
                next_state = np.reshape(ImageProcess.reshapeHalf(next_state),
                                        (80, 80, 1))
                if info['ale.lives'] < life:
                    #reward-=(life-info['ale.lives'])
                    life = info['ale.lives']
                agent.setPerception(next_state, action, reward, done, episode)
                if done:
                    break
            if episode % 500 == 100:
                agent.e_greedy = False
                for e in range(10):
                    state = env.reset()
                    state = ImageProcess.reshapeHalf(state)

                    agent.setInitState(state)
                    life = 5
                    for step in range(MAX_STEP):
                        # env.render()
                        action = agent.getAction()
                        next_state, reward, done, info = env.step(action)
                        next_state = np.reshape(
                            ImageProcess.reshapeHalf(next_state), (80, 80, 1))
                        if info['ale.lives'] < life:
                            #reward-=(life-info['ale.lives'])
                            life = info['ale.lives']
                        agent.setPerception(next_state, action, reward, done,
                                            episode)
                        if done:
                            break
                agent.e_greedy = True

    finally:
        pass
예제 #22
0
def TestTrainer():
    init()
    screen = display.set_mode((WIDTH, HEIGHT))
    surf = Surface((WIDTH, HEIGHT))
    trainerBinding = Trainer(surf)

    # start game, create DQN
    #trainerBinding.initializeGame("ASDFASDF",startAt=Trainer.START_AT_MONSTER_ROOM)
    start(trainerBinding)
    clock = time.Clock()
    actions = 8
    brain = BrainDQN(actions)
    actionList = [
        Trainer.MOVE_LEFT, Trainer.MOVE_RIGHT, Trainer.MOVE_UP,
        Trainer.MOVE_DOWN, Trainer.SHOOT_LEFT, Trainer.SHOOT_RIGHT,
        Trainer.SHOOT_UP, Trainer.SHOOT_DOWN
    ]

    # initialize first state
    action0 = np.array([1, 0, 0, 0, 0, 0, 0, 0])
    trainerBinding.sendPushKeyEvent(actionList[np.argmax(action0)])
    frameData0 = trainerBinding.advanceFrame()
    observation0 = surfarray.array3d(frameData0.surface)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    brain.setInitState(observation0)
    lastIsaacHP = frameData0.isaac_hp
    lastNumEnemies = frameData0.num_enemies

    #start trainings
    running = True
    score = 0
    epochs = 0
    winEpoch = 0

    # Initialize variables used for visualizations
    totalReward = 0
    numQValues = 0
    totalQValue = 0

    while running:
        # print current epoch and score
        print("epoch", epochs, "/ score", score)
        # a harness for computer input, detects when the x
        # on the window has been clicked and exits accordingly
        for evt in event.get():
            if evt.type == QUIT:
                print("first win epoch", winEpoch)
                running = False
        # get action from the DQN
        action, QValue = brain.getAction()
        totalQValue += QValue
        numQValues += 1
        currentAction = actionList[np.argmax(action)]
        trainerBinding.sendPushKeyEvent(currentAction)
        # advance the simulation
        frameData = trainerBinding.advanceFrame()
        # Record position
        pos = np.array(frameData.isaac_position)
        positions.append(np.round(pos, decimals=2))
        # if enter another room, restart
        if frameData.has_room_changed:
            start(trainerBinding)
        # get hp
        currentIsaacHP = frameData.isaac_hp
        currentNumEnemies = frameData.num_enemies
        # if boss is dead, restart game and update score and epochs
        if currentNumEnemies == 0:
            score = score + 1
            epochs = epochs + 1
            if score == 1:
                winEpoch = epochs

            totalRewardsInEpochs.append(totalReward)
            totalReward = 0

            averageQValuesInEpochs.append(totalQValue / numQValues)
            totalQValue = 0
            numQValues = 0

            start(trainerBinding)
            #trainerBinding.initializeGame("ASDFASDF",startAt=Trainer.START_AT_MONSTER_ROOM)
        # get reward
        reward = getReward(currentIsaacHP, lastIsaacHP, currentNumEnemies,
                           lastNumEnemies)
        totalReward += reward
        lastIsaacHP = currentIsaacHP
        lastNumEnemies = currentNumEnemies
        # get terminal state, if terminal, restart game and update epochs
        terminal = terminalState(trainerBinding.getSimulationStatus())
        if terminal == True:  #or reward == -1:
            epochs = epochs + 1

            totalRewardsInEpochs.append(totalReward)
            totalReward = 0

            averageQValuesInEpochs.append(
                np.round(totalQValue / numQValues, decimals=2))
            totalQValue = 0
            numQValues = 0

            start(trainerBinding)
            #trainerBinding.initializeGame("ASDFASDF",startAt=Trainer.START_AT_MONSTER_ROOM)
        # train
        nextObservation = surfarray.array3d(frameData.surface)
        nextObservation = preprocess(nextObservation)
        brain.setPerception(nextObservation, action, reward, terminal)
        # pull out some frame information
        screen.blit(frameData.surface, (0, 0))
        display.flip()
        clock.tick(60)

    quit()