def playFlappyBird(): # Step 1: init BrainDQN actions = 2 brain = BrainDQN(actions) # Step 2: init Flappy Bird Game flappyBird = game.GameState() # Step 3: play game # Step 3.1: obtain init state action0 = np.array([1, 0]) # do nothing observation0, reward0, terminal = flappyBird.frame_step(action0) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY) print observation0.shape brain.setInitState(observation0) # Step 3.2: run the game while 1 != 0: action = brain.getAction() print action nextObservation, reward, terminal = flappyBird.frame_step(action) nextObservation = preprocess(nextObservation) brain.setPerception(nextObservation, action, reward, terminal) cv2.imshow('Video', 0) if cv2.waitKey(1) & 0xFF == 27: break
def run_game(): game = Game() RL = BrainDQN(actions=8) step = 0 for episode in range(500): # initial observation observation = game.reset_map() # print(observation.shape) RL.setInitState(observation) while True: # RL choose action based on observation action = RL.getAction() # for i in range(6): # print(game.states[:,:,i]) # print("action:", action) # RL take action and get next observation and reward observation_, reward, done = game.step(action) # for i in range(6): # print(game.states[:,:,i]) # print("reward:",reward) # print("done:", done) RL.setPerception(observation_, action, reward, done) # break while loop when end of this episode if done: # print("done,reset") observation = game.reset_map() RL.setInitState(observation)
def playSnake(): # Step 1: init BrainDQN actions = 4 top = 0 brain = BrainDQN(actions) # Step 2: init Plane Game GluttonousSnake = game.GameState() # Step 3: play game # Step 3.1: obtain init state action0 = np.array([0, 0, 0, 1]) # [1,0,0]do nothing,[0,1,0]left,[0,0,1]right observation0, reward0, terminal, score = GluttonousSnake.frame_step( action0) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY) # cv2.imwrite("3.jpg",observation0,[int(cv2.IMWRITE_JPEG_QUALITY),5]) brain.setInitState(observation0) # Step 3.2: run the game while 1 != 0: action = brain.getAction() nextObservation, reward, terminal, score = GluttonousSnake.frame_step( action) nextObservation = preprocess(nextObservation) brain.setPerception(nextObservation, action, reward, terminal) if score > top: top = score print('top:%u' % top)
def playFlappyBird(): # Step 1: init BrainDQN actions = 2 brain = BrainDQN(actions) # Step 2: init Flappy Bird Game flappyBird = game.GameState() # Step 3: play game # Step 3.1: obtain init state action0 = np.array([1, 0]) # do nothing observation0, reward0, terminal = flappyBird.frame_step(action0) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY) brain.setInitState(observation0) # Step 3.2: run the game while 1 != 0: action = brain.getAction() # 对初始的状态有action反馈 at nextObservation, reward, terminal = flappyBird.frame_step( action) # 执行器获得指令,并输出该指令的奖励r(t),执行该指令导致的观测 o(t+1), nextObservation = preprocess(nextObservation) tmp_img = showThreshImg(nextObservation) # 从flappyBird中得到的图像是一个旋转加镜像的 cv2.imshow("process", tmp_img) cv2.waitKey(1) brain.setPerception(nextObservation, action, reward, terminal)
def HighSpeedRacing(): # Step 1: init BrainDQN actions = 5 brain = BrainDQN(actions, imgDim) # Step 2: init Flappy Bird Game flappyBird = game.GameState() # Step 3: play game # Step 3.1: obtain init state action0 = np.array([0, 1, 0, 0, 0]) # do nothing observation0, reward0, terminal = flappyBird.frame_step(action0) print(observation0) # print('observation0 1:',observation0) # observation0 = cv2.cvtColor(cv2.resize(observation0, (imgDim[0],imgDim[1])), cv2.COLOR_BGR2GRAY) # ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY) brain.setInitState( observation0, action0) #将observation0复制4份放进BrainDQN的属性self.currentState中 # isUseExpertData = False ## isUseExpertData = True # if(isUseExpertData == True): # filename = "./expertData/observation" # actInd = 0 # observation0 = np.load(filename + str(actInd) + ".npy") # plt.imshow(observation0) # # # Step 3.2: run the game # # while 1!= 0: # for _ in range(1): # actInd = 0 # for actInd in range(1,2073): # actInd += 1 # action = np.load(filename + "action" + str(actInd) + ".npy") # reward = np.load(filename + "reward" + str(actInd) + ".npy") # terminal = np.load(filename + "terminal" + str(actInd) + ".npy") # nextObservation = np.load(filename + str(actInd) + ".npy") # plt.imshow(nextObservation) # nextObservation = preprocess(nextObservation) # brain.setPerception(nextObservation,action,reward,terminal) loss = [] plt.figure() ind = 0 # Step 3.2: run the game while 1 != 0: # time.sleep(0.1) action = brain.getAction() loss.append(brain.loss_temp) ind += 1 if ind % 500 == 499: plt.plot(loss) plt.show() nextObservation, reward, terminal = flappyBird.frame_step(action) # nextObservation = preprocess(nextObservation) brain.setPerception(nextObservation, action, reward, terminal)
def main(): env = gym.make(ENV_NAME) agent = BrainDQN(env.action_space.n) try: for episode in range(EPISODE): state = env.reset() state = ImageProcess.reshapeHalf(state) agent.setInitState(state) life = 5 for step in range(MAX_STEP): # env.render() action = agent.getAction() next_state, reward, done, info = env.step(action) next_state = np.reshape(ImageProcess.reshapeHalf(next_state), (80, 80, 1)) if info['ale.lives'] < life: #reward-=(life-info['ale.lives']) life = info['ale.lives'] agent.setPerception(next_state, action, reward, done, episode) if done: break if episode % 500 == 100: agent.e_greedy = False for e in range(10): state = env.reset() state = ImageProcess.reshapeHalf(state) agent.setInitState(state) life = 5 for step in range(MAX_STEP): # env.render() action = agent.getAction() next_state, reward, done, info = env.step(action) next_state = np.reshape( ImageProcess.reshapeHalf(next_state), (80, 80, 1)) if info['ale.lives'] < life: #reward-=(life-info['ale.lives']) life = info['ale.lives'] agent.setPerception(next_state, action, reward, done, episode) if done: break agent.e_greedy = True finally: pass
def playFlappyBird(): actions = 2 brain = BrainDQN(actions) flappyBird = game.GameState() action0 = np.array([1, 0]) observation0, reward0, terminal = flappyBird.frame_step(action0) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY) brain.setInitState(observation0) while 1 != 0: action = brain.getAction() nextObservation, reward, terminal = flappyBird.frame_step(action) nextObservation = preprocess(nextObservation) brain.setPerception(nextObservation, action, reward, terminal)
def playGame(): game = GameObject() game.train = TRAIN actions = 3 brain = BrainDQN(actions, game) action0 = np.array([1, 0, 0]) # do nothing observation0, _ = game.frame_step(action0) observation0 = cv2.resize(observation0, (IMG_HEIGHT, IMG_WIDTH)) observation0 = cv2.cvtColor(observation0, cv2.COLOR_BGR2GRAY) brain.setInitState(observation0) while True: action = brain.getAction() nextObservation, reward = game.frame_step(action) nextObservation = preprocess(nextObservation) brain.setPerception(nextObservation, action, reward)
def playGame(): # Step 0: Define reort win = 0 lose = 0 points = 0 # Step 1: init BrainDQN actions = 5 brain = BrainDQN(actions) # Step 2: init Game bg = Game() # Step 3: play game # Step 3.1: obtain init state action0 = bg.gen_action([1, 0, 0, 0, 0]) # do nothing observation0, reward0, terminal = bg.step(action0) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY) brain.setInitState(observation0) # Step 3.2: run the game while True: pygame.event.get() #讓遊戲畫面能夠更新 action = bg.gen_action(brain.getAction()) Observation, reward, terminal = bg.step(action) nextObservation = preprocess(Observation) brain.setPerception(nextObservation, action, reward, terminal) ######################## 統計輸出報表用 ######################## points += reward if terminal == True: win += points lose += 1 points = 0 bg = Game() print("Lost cnt:", lose, " ,win_points:", round(points, 2), " ,cnt:", brain.timeStep) if brain.timeStep % 1000 == 0: learn_rate.append(lose) win_cnt.append(win) plt.plot(learn_rate, "g") plt.plot(win_cnt, "r") plt.show() lose = 0 win = 0
def PlaySurvalNew(): logging.basicConfig(filename='logs/myplayer.log', level=logging.INFO) logging.info('Started') # 9个方向 action_space = ['u', 'd', 'l', 'r', 'a', 'x', 's', 'w', 'e'] n_actions = len(action_space) train = BrainDQN(n_actions) # 初始化随机地图类 genermap = map() TrainGame = Game() mapnum = genermap.init_battle_map() for i in range(mapnum): #计算每张地图循环多少次 looptimes = genermap.calclooptimes(i) logging.info('loop %d,i %d', looptimes, i) for j in range(looptimes): flag = False index = j if j >= looptimes / 2: flag = True index = j - looptimes # 初始化随机地图信息 reservelist = genermap.fillplayerpositon(index, i, flag) # 产生毒气 posionlist = genermap.GeneratorPosion(i) # 产生道具 genermap.GeneratorTool(reservelist, posionlist, i) # 初始化Game环境 TrainGame.binary_env_reset(genermap.cacheMap, genermap.PosionMap) train.setInitState(TrainGame.binary_env) # 循环训练1W次,换一张地图 for episode in range(5000): # 随机取一个方向 action = train.getAction() # 计算该方向的reward nextObservation, reward, terminal = TrainGame.binary_step( action) # 设置到训练集 train.setPerception(nextObservation, action, reward, terminal) if terminal == True: TrainGame.binary_env_reset(genermap.cacheMap, genermap.PosionMap) train.setInitState(TrainGame.binary_env)
def playFlappyBird(): # Step 1: init BrainDQN actions = 2 brain = BrainDQN(actions) # Step 2: init Flappy Bird Game flappyBird = game.GameState() # Step 3: play game # Step 3.1: obtain init state action0 = np.array([1,0]) # do nothing observation0, reward0, terminal = flappyBird.frame_step(action0) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY) brain.setInitState(observation0) # Step 3.2: run the game while 1!= 0: action = brain.getAction() nextObservation,reward,terminal = flappyBird.frame_step(action) nextObservation = preprocess(nextObservation) brain.setPerception(nextObservation,action,reward,terminal)
def playSurvival(): logging.basicConfig(filename='logs/myplayer.log', level=logging.INFO) logging.info('Started') # 9个方向 action_space = ['u', 'd', 'l', 'r', 'a', 'x', 's', 'w', 'e'] n_actions = len(action_space) train = BrainDQN(n_actions) # 初始化随机地图类 genermap = map() TrainGame = Game() for i in range(6000000): loop = 0 loop += int(i / 1000000) print("loop:", loop) logging.info('loop %d,i %d', loop, i) # 初始化随机地图信息 genermap.init_battle_map() reservelist = genermap.fillramdomplayer(loop) print("Posion start") #产生毒气 posionlist = genermap.GeneratorPosion(i) print("Posion end") #产生道具 genermap.GeneratorTool(reservelist, posionlist, i) # 初始化Game环境 TrainGame.binary_env_reset(genermap.cacheMap, genermap.PosionMap) train.setInitState(TrainGame.binary_env) # 循环训练1W次,换一张地图 for episode in range(2000): # 随机取一个方向 action = train.getAction() # 计算该方向的reward nextObservation, reward, terminal = TrainGame.binary_step(action) # 设置到训练集 train.setPerception(nextObservation, action, reward, terminal) if terminal == True: TrainGame.binary_env_reset(genermap.cacheMap, genermap.PosionMap) train.setInitState(TrainGame.binary_env) break
def playPlane(): # Step 1: init BrainDQN actions = 3 brain = BrainDQN(actions) # Step 2: init Plane Game plane = game.GameState() # Step 3: play game # Step 3.1: obtain init state action0 = np.array([1, 0, 0]) # [1,0,0]do nothing,[0,1,0]left,[0,0,1]right observation0, reward0, terminal = plane.frame_step(action0) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY) brain.setInitState(observation0) # Step 3.2: run the game while 1 != 0: action = brain.getAction() nextObservation, reward, terminal = plane.frame_step(action) nextObservation = preprocess(nextObservation) brain.setPerception(nextObservation, action, reward, terminal)
def playFlappyBird(): # Step 1: init BrainDQN actions = 2 brain = BrainDQN(actions) # Step 2: init Flappy Bird Game flappyBird = game.GameState() # Step 3: play game # Step 3.1: obtain init state action0 = np.array([1, 0]) # do nothing observation0, reward0, terminal = flappyBird.frame_step(action0) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) _, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY) # observation0 = preprocess(observation0) # 为何初始化brain时使用的图片是[80,80],而不是[80,80,1] => 因为setInitState()输入的是4张重叠的图片 brain.setInitState(observation0) # Step 3.2: run the game while True: action = brain.getAction() # 基于当前observation获取下一个action nextObservation, reward, terminal = flappyBird.frame_step(action) # 采取行动action nextObservation = preprocess(nextObservation) brain.setPerception(nextObservation, action, reward, terminal) # 学习这个行动带来的回报
def playFlappyBird(): # Step 1: init BrainDQN actions = 2 brain = BrainDQN(actions) # Step 2: init Flappy Bird Game flappyBird = game.GameState() flappyBird.reset_max_score() # Step 3: play game # Step 3.1: obtain init state action0 = np.array([1,0]) # do nothing observation0, reward0, terminal = flappyBird.frame_step(action0) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY) brain.setInitState(observation0) # Step 3.2: run the game while 1!= 0: action = brain.getAction() nextObservation,reward,terminal = flappyBird.frame_step(action) nextObservation = preprocess(nextObservation) brain.setPerception(nextObservation,action,reward,terminal) # "/ STATE", brain.state print("TS {}/AVG {:.2f}/EPS {:.4f}/MAX {}/TURN {}".format(brain.timeStep, flappyBird.avg_score, brain.epsilon, flappyBird.max_score, flappyBird.turns))
def playFlappyBird(): # Step 1: init BrainDQN actions = 2 brain = BrainDQN(actions) # Step 2: init Flappy Bird Game flappyBird = game.GameState() # Step 3: play game # Step 3.1: obtain init state action0 = np.array([1,0]) # do nothing observation0, reward0, terminal = flappyBird.frame_step(action0) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY) brain.setInitState(observation0.astype(np.float32)) # Step 3.2: run the game writer = tf.summary.create_file_writer("tf_logs") with writer.as_default(): while 1!= 0: action = brain.getAction() nextObservation,reward,terminal = flappyBird.frame_step(action) nextObservation = preprocess(nextObservation) step, loss = brain.setPerception(nextObservation,action,reward,terminal) tf.summary.scalar("loss", loss, step=step) writer.flush()
if action_pl[0] == 1.0 and action_pl[3] == 0: action_nerual = [1.0, 0, 0, 0, 0, 0, 0] elif action_pl[0] == 1.0 and action_pl[3] == 1: action_nerual = [0, 1.0, 0, 0, 0, 0, 0] elif action_pl[0] == 1.0 and action_pl[3] == 2: action_nerual = [0, 0, 1.0, 0, 0, 0, 0] elif action_pl[0] == -1.0 and action_pl[1] == 0: action_nerual = [0, 0, 0, 1.0, 0, 0, 0] elif action_pl[0] == -1.0 and action_pl[1] == 90: action_nerual = [0, 0, 0, 0, 1.0, 0, 0] elif action_pl[0] == -1.0 and action_pl[1] == 180: action_nerual = [0, 0, 0, 0, 0, 1.0, 0] else: action_nerual = [0, 0, 0, 0, 0, 0, 1.0] brain.setPerception(obs, action_nerual, reward, done) #unit_myself_index += 1 #print(action,obs,reward,done,info) episodes += 1 #obs = env.reset() _, p_state = env.reset() #test total_reward = 0 avg_reward = 0 TEST_EPISODE = 50 if episodes % 2000 == 0: endtime = time.time() gtime = endtime - startime print gtime for test_i in xrange(TEST_EPISODE):
def TestTrainer(): init() screen = display.set_mode((WIDTH, HEIGHT)) surf = Surface((WIDTH, HEIGHT)) trainerBinding = Trainer(surf) # start game, create DQN #trainerBinding.initializeGame("ASDFASDF",startAt=Trainer.START_AT_MONSTER_ROOM) start(trainerBinding) clock = time.Clock() actions = 8 brain = BrainDQN(actions) actionList = [ Trainer.MOVE_LEFT, Trainer.MOVE_RIGHT, Trainer.MOVE_UP, Trainer.MOVE_DOWN, Trainer.SHOOT_LEFT, Trainer.SHOOT_RIGHT, Trainer.SHOOT_UP, Trainer.SHOOT_DOWN ] # initialize first state action0 = np.array([1, 0, 0, 0, 0, 0, 0, 0]) trainerBinding.sendPushKeyEvent(actionList[np.argmax(action0)]) frameData0 = trainerBinding.advanceFrame() observation0 = surfarray.array3d(frameData0.surface) observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY) ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY) brain.setInitState(observation0) lastIsaacHP = frameData0.isaac_hp lastNumEnemies = frameData0.num_enemies #start trainings running = True score = 0 epochs = 0 winEpoch = 0 # Initialize variables used for visualizations totalReward = 0 numQValues = 0 totalQValue = 0 while running: # print current epoch and score print("epoch", epochs, "/ score", score) # a harness for computer input, detects when the x # on the window has been clicked and exits accordingly for evt in event.get(): if evt.type == QUIT: print("first win epoch", winEpoch) running = False # get action from the DQN action, QValue = brain.getAction() totalQValue += QValue numQValues += 1 currentAction = actionList[np.argmax(action)] trainerBinding.sendPushKeyEvent(currentAction) # advance the simulation frameData = trainerBinding.advanceFrame() # Record position pos = np.array(frameData.isaac_position) positions.append(np.round(pos, decimals=2)) # if enter another room, restart if frameData.has_room_changed: start(trainerBinding) # get hp currentIsaacHP = frameData.isaac_hp currentNumEnemies = frameData.num_enemies # if boss is dead, restart game and update score and epochs if currentNumEnemies == 0: score = score + 1 epochs = epochs + 1 if score == 1: winEpoch = epochs totalRewardsInEpochs.append(totalReward) totalReward = 0 averageQValuesInEpochs.append(totalQValue / numQValues) totalQValue = 0 numQValues = 0 start(trainerBinding) #trainerBinding.initializeGame("ASDFASDF",startAt=Trainer.START_AT_MONSTER_ROOM) # get reward reward = getReward(currentIsaacHP, lastIsaacHP, currentNumEnemies, lastNumEnemies) totalReward += reward lastIsaacHP = currentIsaacHP lastNumEnemies = currentNumEnemies # get terminal state, if terminal, restart game and update epochs terminal = terminalState(trainerBinding.getSimulationStatus()) if terminal == True: #or reward == -1: epochs = epochs + 1 totalRewardsInEpochs.append(totalReward) totalReward = 0 averageQValuesInEpochs.append( np.round(totalQValue / numQValues, decimals=2)) totalQValue = 0 numQValues = 0 start(trainerBinding) #trainerBinding.initializeGame("ASDFASDF",startAt=Trainer.START_AT_MONSTER_ROOM) # train nextObservation = surfarray.array3d(frameData.surface) nextObservation = preprocess(nextObservation) brain.setPerception(nextObservation, action, reward, terminal) # pull out some frame information screen.blit(frameData.surface, (0, 0)) display.flip() clock.tick(60) quit()