Example #1
0
def PongDQN():
    GameTime = 0

    GameHistory = []

    #Create our PongGame instance
    TheGame = MyPong.PongGame()
    # Initialise Game
    TheGame.InitialDisplay()

    TheAgent = MyAgent.Agent(STATECOUNT, ACTIONS)

    BestAction = 0

    GameState = CaptureNormalisedState(200.0, 200.0, 200.0, 1.0, 1.0)

    for gtime in range(TOTAL_GAMETIME):

        if GameTime % 100 == 0:
            TheGame.UpdateGameDisplay(GameTime, TheAgent.epsilon)

        BestAction = TheAgent.Act(GameState)

        [
            ReturnScore, PlayerYPos, BallXPos, BallYPos, BallXDirection,
            BallYDirection
        ] = TheGame.PlayNextMove(BestAction)
        NextState = CaptureNormalisedState(PlayerYPos, BallXPos, BallYPos,
                                           BallXDirection, BallYDirection)

        TheAgent.CaptureSample((GameState, BestAction, ReturnScore, NextState))

        TheAgent.Process()

        GameState = NextState

        GameTime = GameTime + 1

        if GameTime % 1000 == 0:

            donothing = 0

        if GameTime % 200 == 0:
            print("Timestep: ", GameTime, " Score: ",
                  "{0:.2f}".format(TheGame.GScore), "   EPSILON: ",
                  "{0:.4f}".format(TheAgent.epsilon))
            GameHistory.append((GameTime, TheGame.GScore, TheAgent.epsilon))

    x_val = [x[0] for x in GameHistory]
    y_val = [x[1] for x in GameHistory]

    plt.plot(x_val, y_val)
    plt.xlabel("Game Time")
    plt.ylabel("Score")
    plt.show()
def PlayGame():
	GameTime = 0
    
	GameHistory = []
	
	#Create our PongGame instance
	TheGame = MyPong.PongGame()
    # Initialise Game
	TheGame.InitialDisplay()
	#
	#  Create our Agent (including DQN based Brain)
	TheAgent = MyAgent.Agent()
	
	# Now Now the Trained Model into the Agent
	TheAgent.LoadBestModel()
	
	# Initialise NextAction  Assume Action is scalar:  0:stay, 1:Up, 2:Down
	BestAction = 0
	
	# Get an Initial State
	[InitialScore,InitialScreenImage]= TheGame.PlayNextMove(BestAction)
	InitialGameImage = ProcessGameImage(InitialScreenImage);
	#
	# Now Initialise the Game State as the Stack of four x intial Images
	GameState = np.stack((InitialGameImage, InitialGameImage, InitialGameImage, InitialGameImage), axis=2)
	# Keras expects shape 1x40x40x4
	GameState = GameState.reshape(1, GameState.shape[0], GameState.shape[1], GameState.shape[2])
	
    # =================================================================
	#Main Experiment Loop 
	while (GameTime < TOTAL_GAMETIME):    
	
		# First just Update the Game Display
		if GameTime % 100 == 0:
			TheGame.UpdateGameDisplay(GameTime,TheAgent.epsilon)

		# Get the Best Action From the Agent
		BestAction = 0
		BestAction = TheAgent.ReturnBestAct(GameState)
		
		#  Now Apply the Recommended Action into the Game 	
		[ReturnScore,NewScreenImage]= TheGame.PlayNextMove(BestAction)
		
		# Need to process the returned Screen Image, 
		NewGameImage = ProcessGameImage(NewScreenImage);
		
		# Now reshape Keras expects shape 1x40x40x1
		NewGameImage = NewGameImage.reshape(1, NewGameImage.shape[0], NewGameImage.shape[1], 1)
		
		#Now Add the new Image into the Next GameState stack, using 3 previous capture game images 
		NextState = np.append(NewGameImage, GameState[:, :, :, :3], axis=3)
		
		# Move State On
		GameState = NextState
		
		# Move GameTime Click
		GameTime = GameTime+1

        #Save the model every 5000
		if GameTime % 5000 == 0:
            # Save the Keras Model
			TheAgent.SaveWeights()

		if GameTime % 25 == 0:
			print("Game Time: ", GameTime,"  Game Score: ", "{0:.2f}".format(TheGame.GScore), "   EPSILON: ", "{0:.4f}".format(TheAgent.epsilon))
			GameHistory.append((GameTime,TheGame.GScore,TheAgent.epsilon))
			
			#  Now write the Play progress to File
			GFile = open('PlayHistory.dat','wb')
			pickle.dump(GameHistory,GFile)
			GFile.close()
				
	# ===============================================
	
	#  Game Completed So Display the Final Scores Grapth
	GFile = open('PlayHistory.dat','rb')
	PlayHistory = pickle.load(GFile)
	GFile.close()	

	# Plot the Score vs Game Time profile
	x_val = [x[0] for x in PlayHistory]
	y_val = [x[1] for x in PlayHistory]

	plt.plot(x_val,y_val)
	plt.xlabel("Play Time")
	plt.ylabel("Score")
	plt.show()
Example #3
0
def PlayExperiment():
    GameTime = 0

    GameHistory = []

    #Create our PongGame instance
    TheGame = MyPong.PongGame()
    # Initialise Game
    TheGame.InitialDisplay()
    #
    #  Create our Agent (including DQN based Brain)
    TheAgent = MyAgent.Agent(STATECOUNT, ACTIONS)

    # Initialise NextAction  Assume Action is scalar:  0:stay, 1:Up, 2:Down
    BestAction = 0

    # Initialise current Game State ~ Believe insigificant: (PlayerYPos, BallXPos, BallYPos, BallXDirection, BallYDirection)
    GameState = CaptureNormalisedState(200.0, 200.0, 200.0, 1.0, 1.0)

    # =================================================================
    #Main Experiment Loop
    for gtime in range(TOTAL_GAMETIME):

        # First just Update the Game Display
        if GameTime % 100 == 0:
            TheGame.UpdateGameDisplay(GameTime, TheAgent.epsilon)

        # Determine Next Action From the Agent
        BestAction = TheAgent.Act(GameState)

        # =================
        # Uncomment this out to Test Game Engine:  Player Paddle then Acts the same way as Right Hand programmed Player
        # Get Current Game State
        #[PlayerYPos, BallXPos, BallYPos, BallXDirection, BallYDirection] = TheGame.ReturnCurrentState()

        # Move up if ball is higher than Openient Paddle
        #if (PlayerYPos + 30 > BallYPos + 5):
        #	BestAction = 1
        # Move down if ball lower than Opponent Paddle
        #if (PlayerYPos + 30 < BallYPos + 5):
        #	BestAction = 2
        # =============================

        #  Now Apply the Recommended Action into the Game
        [
            ReturnScore, PlayerYPos, BallXPos, BallYPos, BallXDirection,
            BallYDirection
        ] = TheGame.PlayNextMove(BestAction)
        NextState = CaptureNormalisedState(PlayerYPos, BallXPos, BallYPos,
                                           BallXDirection, BallYDirection)

        # Capture the Sample [S, A, R, S"] in Agent Experience Replay Memory
        TheAgent.CaptureSample((GameState, BestAction, ReturnScore, NextState))

        #  Now Request Agent to DQN Train process  Against Experience
        TheAgent.Process()

        # Move State On
        GameState = NextState

        # Move GameTime Click
        GameTime = GameTime + 1

        #print our where wer are after saving where we are
        if GameTime % 1000 == 0:
            # Save the Keras Model
            donothing = 0

        if GameTime % 200 == 0:
            print("Game Time: ", GameTime, "  Game Score: ",
                  "{0:.2f}".format(TheGame.GScore), "   EPSILON: ",
                  "{0:.4f}".format(TheAgent.epsilon))
            GameHistory.append((GameTime, TheGame.GScore, TheAgent.epsilon))

    # ===============================================
    # End of Game Loop  so Plot the Score vs Game Time profile
    x_val = [x[0] for x in GameHistory]
    y_val = [x[1] for x in GameHistory]

    plt.plot(x_val, y_val)
    plt.xlabel("Game Time")
    plt.ylabel("Score")
    plt.show()
Example #4
0
def TrainExperiment():
	TrainTime = 0

	TrainHistory = []

	ScoreCheck = deque()
	NotQuit = True

    # Initialise Game

	#Create our PongGame instance
	TheGame = MyForex.ForexGame()

	#  Create our Agent (including DQN based Brain)
	TheAgent = MyAgent.Agent()

	# Initialise NextAction  Assume Action is scalar:  0:stay, 1:Up, 2:Down
	BestAction = 0

	# Get an Initial State
	[InitialScore,InitialScreenImage]= TheGame.PlayNextMove(BestAction)
	InitialGameImage = ProcessGameImage(InitialScreenImage)
	#
	# Now Initialise the Game State as the Stack of four x intial Images
	GameState = np.stack((InitialGameImage, InitialGameImage, InitialGameImage, InitialGameImage), axis=2)
	# Keras expects shape 1x40x40x4 (old)
	# Keras expects shape 1x80x80x4 (modified)
	GameState = GameState.reshape(1, GameState.shape[0], GameState.shape[1], GameState.shape[2])

    # =================================================================
	#Main Experiment Loop
	#Loop over data
	while (TheGame.current_data_position < len(TheGame.data) and NotQuit):
		# Determine Next Action From the Agent
		BestAction = TheAgent.FindBestAct(GameState)

		#  Now Apply the Recommended Action into the Game
		[ReturnScore,NewScreenImage]= TheGame.PlayNextMove(BestAction)

		# Need to process the returned Screen Image,
		NewGameImage = ProcessGameImage(NewScreenImage)

		# Now reshape Keras expects shape 1x40x40x1 (old)
		# Now reshape Keras expects shape 1x80x80x1 (modified)
		NewGameImage = NewGameImage.reshape(1, NewGameImage.shape[0], NewGameImage.shape[1], 1)

		#Now Add the new Image into the Next GameState stack, using 3 previous capture game images
		NextState = np.append(NewGameImage, GameState[:, :, :, :3], axis=3)

		# Capture the Sample [S, A, R, S'] in Agent Experience Replay Memory
		TheAgent.CaptureSample((GameState,BestAction,ReturnScore,NextState))

		#  Now Request Agent to DQN Train process  Against Experience
		TheAgent.Process()

		# Move State On
		GameState = NextState

		# Move TrainTime Click
		TrainTime = TrainTime + 1

        #Save the model every 5000
		if TrainTime % 5000 == 0:
            # Save the Keras Model
			TheAgent.SaveWeights()

		if TrainTime % 100 == 0:
			print("Train Time: ", TrainTime,"  Game Score: ", "{0:.2f}".format(TheGame.GScore), "   EPSILON: ", "{0:.4f}".format(TheAgent.epsilon))
			TrainHistory.append((TrainTime,TheGame.GScore,TheAgent.epsilon))

			#  Now write the progress to File
			GFile = open('TrainHistory.dat','wb')
			pickle.dump(TrainHistory,GFile)
			GFile.close()

			#  Queue up last SCORELENGTH if Reached Good Performance
			ScoreCheck.append(TheGame.GScore)
			if len(ScoreCheck) > SCORELENGTH:
				ScoreCheck.popleft()
			# Check Average Scores  if greater than 9.75 assume reached peak performance
			SSum= 0.0
			for ScoreItem in ScoreCheck:
				SSum = SSum + ScoreItem
			if SSum/ SCORELENGTH > TARGET_SCORE_TO_END_GAME:
				print("Achieved Good Performance, Saving Best Model")
				TheAgent.SaveBestWeights()
				# Complete the Game Loop
				NotQuit = False

	# ===============================================
	#  Game Completed So Display the Final Scores Grapth
	GFile = open('TrainHistory.dat','rb')
	TrainHistory = pickle.load(GFile)
	GFile.close()

	# Plot the Score vs Game Time profile
	x_val = [x[0] for x in TrainHistory]
	y_val = [x[1] for x in TrainHistory]

	plt.plot(x_val,y_val)
	plt.xlabel("Game Time")
	plt.ylabel("Score")
	plt.show()