def PongDQN(): GameTime = 0 GameHistory = [] #Create our PongGame instance TheGame = MyPong.PongGame() # Initialise Game TheGame.InitialDisplay() TheAgent = MyAgent.Agent(STATECOUNT, ACTIONS) BestAction = 0 GameState = CaptureNormalisedState(200.0, 200.0, 200.0, 1.0, 1.0) for gtime in range(TOTAL_GAMETIME): if GameTime % 100 == 0: TheGame.UpdateGameDisplay(GameTime, TheAgent.epsilon) BestAction = TheAgent.Act(GameState) [ ReturnScore, PlayerYPos, BallXPos, BallYPos, BallXDirection, BallYDirection ] = TheGame.PlayNextMove(BestAction) NextState = CaptureNormalisedState(PlayerYPos, BallXPos, BallYPos, BallXDirection, BallYDirection) TheAgent.CaptureSample((GameState, BestAction, ReturnScore, NextState)) TheAgent.Process() GameState = NextState GameTime = GameTime + 1 if GameTime % 1000 == 0: donothing = 0 if GameTime % 200 == 0: print("Timestep: ", GameTime, " Score: ", "{0:.2f}".format(TheGame.GScore), " EPSILON: ", "{0:.4f}".format(TheAgent.epsilon)) GameHistory.append((GameTime, TheGame.GScore, TheAgent.epsilon)) x_val = [x[0] for x in GameHistory] y_val = [x[1] for x in GameHistory] plt.plot(x_val, y_val) plt.xlabel("Game Time") plt.ylabel("Score") plt.show()
def PlayGame(): GameTime = 0 GameHistory = [] #Create our PongGame instance TheGame = MyPong.PongGame() # Initialise Game TheGame.InitialDisplay() # # Create our Agent (including DQN based Brain) TheAgent = MyAgent.Agent() # Now Now the Trained Model into the Agent TheAgent.LoadBestModel() # Initialise NextAction Assume Action is scalar: 0:stay, 1:Up, 2:Down BestAction = 0 # Get an Initial State [InitialScore,InitialScreenImage]= TheGame.PlayNextMove(BestAction) InitialGameImage = ProcessGameImage(InitialScreenImage); # # Now Initialise the Game State as the Stack of four x intial Images GameState = np.stack((InitialGameImage, InitialGameImage, InitialGameImage, InitialGameImage), axis=2) # Keras expects shape 1x40x40x4 GameState = GameState.reshape(1, GameState.shape[0], GameState.shape[1], GameState.shape[2]) # ================================================================= #Main Experiment Loop while (GameTime < TOTAL_GAMETIME): # First just Update the Game Display if GameTime % 100 == 0: TheGame.UpdateGameDisplay(GameTime,TheAgent.epsilon) # Get the Best Action From the Agent BestAction = 0 BestAction = TheAgent.ReturnBestAct(GameState) # Now Apply the Recommended Action into the Game [ReturnScore,NewScreenImage]= TheGame.PlayNextMove(BestAction) # Need to process the returned Screen Image, NewGameImage = ProcessGameImage(NewScreenImage); # Now reshape Keras expects shape 1x40x40x1 NewGameImage = NewGameImage.reshape(1, NewGameImage.shape[0], NewGameImage.shape[1], 1) #Now Add the new Image into the Next GameState stack, using 3 previous capture game images NextState = np.append(NewGameImage, GameState[:, :, :, :3], axis=3) # Move State On GameState = NextState # Move GameTime Click GameTime = GameTime+1 #Save the model every 5000 if GameTime % 5000 == 0: # Save the Keras Model TheAgent.SaveWeights() if GameTime % 25 == 0: print("Game Time: ", GameTime," Game Score: ", "{0:.2f}".format(TheGame.GScore), " EPSILON: ", "{0:.4f}".format(TheAgent.epsilon)) GameHistory.append((GameTime,TheGame.GScore,TheAgent.epsilon)) # Now write the Play progress to File GFile = open('PlayHistory.dat','wb') pickle.dump(GameHistory,GFile) GFile.close() # =============================================== # Game Completed So Display the Final Scores Grapth GFile = open('PlayHistory.dat','rb') PlayHistory = pickle.load(GFile) GFile.close() # Plot the Score vs Game Time profile x_val = [x[0] for x in PlayHistory] y_val = [x[1] for x in PlayHistory] plt.plot(x_val,y_val) plt.xlabel("Play Time") plt.ylabel("Score") plt.show()
def PlayExperiment(): GameTime = 0 GameHistory = [] #Create our PongGame instance TheGame = MyPong.PongGame() # Initialise Game TheGame.InitialDisplay() # # Create our Agent (including DQN based Brain) TheAgent = MyAgent.Agent(STATECOUNT, ACTIONS) # Initialise NextAction Assume Action is scalar: 0:stay, 1:Up, 2:Down BestAction = 0 # Initialise current Game State ~ Believe insigificant: (PlayerYPos, BallXPos, BallYPos, BallXDirection, BallYDirection) GameState = CaptureNormalisedState(200.0, 200.0, 200.0, 1.0, 1.0) # ================================================================= #Main Experiment Loop for gtime in range(TOTAL_GAMETIME): # First just Update the Game Display if GameTime % 100 == 0: TheGame.UpdateGameDisplay(GameTime, TheAgent.epsilon) # Determine Next Action From the Agent BestAction = TheAgent.Act(GameState) # ================= # Uncomment this out to Test Game Engine: Player Paddle then Acts the same way as Right Hand programmed Player # Get Current Game State #[PlayerYPos, BallXPos, BallYPos, BallXDirection, BallYDirection] = TheGame.ReturnCurrentState() # Move up if ball is higher than Openient Paddle #if (PlayerYPos + 30 > BallYPos + 5): # BestAction = 1 # Move down if ball lower than Opponent Paddle #if (PlayerYPos + 30 < BallYPos + 5): # BestAction = 2 # ============================= # Now Apply the Recommended Action into the Game [ ReturnScore, PlayerYPos, BallXPos, BallYPos, BallXDirection, BallYDirection ] = TheGame.PlayNextMove(BestAction) NextState = CaptureNormalisedState(PlayerYPos, BallXPos, BallYPos, BallXDirection, BallYDirection) # Capture the Sample [S, A, R, S"] in Agent Experience Replay Memory TheAgent.CaptureSample((GameState, BestAction, ReturnScore, NextState)) # Now Request Agent to DQN Train process Against Experience TheAgent.Process() # Move State On GameState = NextState # Move GameTime Click GameTime = GameTime + 1 #print our where wer are after saving where we are if GameTime % 1000 == 0: # Save the Keras Model donothing = 0 if GameTime % 200 == 0: print("Game Time: ", GameTime, " Game Score: ", "{0:.2f}".format(TheGame.GScore), " EPSILON: ", "{0:.4f}".format(TheAgent.epsilon)) GameHistory.append((GameTime, TheGame.GScore, TheAgent.epsilon)) # =============================================== # End of Game Loop so Plot the Score vs Game Time profile x_val = [x[0] for x in GameHistory] y_val = [x[1] for x in GameHistory] plt.plot(x_val, y_val) plt.xlabel("Game Time") plt.ylabel("Score") plt.show()
def TrainExperiment(): TrainTime = 0 TrainHistory = [] ScoreCheck = deque() NotQuit = True # Initialise Game #Create our PongGame instance TheGame = MyForex.ForexGame() # Create our Agent (including DQN based Brain) TheAgent = MyAgent.Agent() # Initialise NextAction Assume Action is scalar: 0:stay, 1:Up, 2:Down BestAction = 0 # Get an Initial State [InitialScore,InitialScreenImage]= TheGame.PlayNextMove(BestAction) InitialGameImage = ProcessGameImage(InitialScreenImage) # # Now Initialise the Game State as the Stack of four x intial Images GameState = np.stack((InitialGameImage, InitialGameImage, InitialGameImage, InitialGameImage), axis=2) # Keras expects shape 1x40x40x4 (old) # Keras expects shape 1x80x80x4 (modified) GameState = GameState.reshape(1, GameState.shape[0], GameState.shape[1], GameState.shape[2]) # ================================================================= #Main Experiment Loop #Loop over data while (TheGame.current_data_position < len(TheGame.data) and NotQuit): # Determine Next Action From the Agent BestAction = TheAgent.FindBestAct(GameState) # Now Apply the Recommended Action into the Game [ReturnScore,NewScreenImage]= TheGame.PlayNextMove(BestAction) # Need to process the returned Screen Image, NewGameImage = ProcessGameImage(NewScreenImage) # Now reshape Keras expects shape 1x40x40x1 (old) # Now reshape Keras expects shape 1x80x80x1 (modified) NewGameImage = NewGameImage.reshape(1, NewGameImage.shape[0], NewGameImage.shape[1], 1) #Now Add the new Image into the Next GameState stack, using 3 previous capture game images NextState = np.append(NewGameImage, GameState[:, :, :, :3], axis=3) # Capture the Sample [S, A, R, S'] in Agent Experience Replay Memory TheAgent.CaptureSample((GameState,BestAction,ReturnScore,NextState)) # Now Request Agent to DQN Train process Against Experience TheAgent.Process() # Move State On GameState = NextState # Move TrainTime Click TrainTime = TrainTime + 1 #Save the model every 5000 if TrainTime % 5000 == 0: # Save the Keras Model TheAgent.SaveWeights() if TrainTime % 100 == 0: print("Train Time: ", TrainTime," Game Score: ", "{0:.2f}".format(TheGame.GScore), " EPSILON: ", "{0:.4f}".format(TheAgent.epsilon)) TrainHistory.append((TrainTime,TheGame.GScore,TheAgent.epsilon)) # Now write the progress to File GFile = open('TrainHistory.dat','wb') pickle.dump(TrainHistory,GFile) GFile.close() # Queue up last SCORELENGTH if Reached Good Performance ScoreCheck.append(TheGame.GScore) if len(ScoreCheck) > SCORELENGTH: ScoreCheck.popleft() # Check Average Scores if greater than 9.75 assume reached peak performance SSum= 0.0 for ScoreItem in ScoreCheck: SSum = SSum + ScoreItem if SSum/ SCORELENGTH > TARGET_SCORE_TO_END_GAME: print("Achieved Good Performance, Saving Best Model") TheAgent.SaveBestWeights() # Complete the Game Loop NotQuit = False # =============================================== # Game Completed So Display the Final Scores Grapth GFile = open('TrainHistory.dat','rb') TrainHistory = pickle.load(GFile) GFile.close() # Plot the Score vs Game Time profile x_val = [x[0] for x in TrainHistory] y_val = [x[1] for x in TrainHistory] plt.plot(x_val,y_val) plt.xlabel("Game Time") plt.ylabel("Score") plt.show()