print(loss[-1]) if (i % 20 == 0): YY = np.argmax(a, axis=1) error = BATCH_SIZE - sum(np.equal(batch_y, YY)) print(i, "epochs. Error:", error) print("# 2s:", np.sum(np.equal(YY, 2))) print("Training Done. Loss =", loss) #a = Network.forward(X) plt.figure() plt.plot(loss) plt.title('Loss over epochs') window = gfx.GFX() window.fps = 10e4 # you can modify this for debugging purposes, default=30 Game = png.PongModel(0.5, 0.5, 0.03, 0.01, 0.4) # initialize state games = 0 totalscore = 0 while 1: # main loop if (window._open == False): break state = Game.get_state() # state = np.divide(state-means, stds).tolist() actionlist = Network.forward(state) maxidx = 0 for i in range(1, len(actionlist)): if (actionlist[i] > actionlist[maxidx]): maxidx = i
Game.move_down() def UpKey2(Game): # move player 2 Game.move_up(2) def DownKey2(Game): Game.move_down(2) if __name__ == '__main__': window = gfx.GFX(wall_x=0, player_x=1, players=2) #window.fps = 5 # you can modify this for debugging purposes, default=30 Game = png.PongModel(0.5, 0.5, 0.03, 0.01, 0.4, paddleX=1) # initialize state Game.init2(0.4, 0) #################################################### Ignore this window.win.bind('<Up>', lambda eff: UpKey(Game)) window.win.bind('<Down>', lambda eff: DownKey(Game)) window.win.bind('w', lambda eff: UpKey2(Game)) window.win.bind('s', lambda eff: DownKey2(Game)) ######################################################## /ignore while 1: # main loop if (window._open == False): break Game.update(window)
if __name__ == '__main__': # Load data X, y = load_data('./data/expert_q.txt') # Initialize the model nn = Network(hidden_layers=(256, 256, 256, 256), lr=9e-2, epoch=600, bias=True, batch_size=64) try: nn.load_weights() except: print('WARNING: No pre-trained networks!') # Training nn.train(X, y) nn.save_weights() # Display accuracy accuracy(nn, X, y) print() # Simulate 1,000 games N = 1000 environment = pm.PongModel(0.5, 0.5, 0.03, 0.01, 0.4) window = gfx.GFX() window.fps = 4e16 avg_score = simulate_game(N, window, environment, nn) print('Avg score of {0} games:{1:.3f}'.format(N, avg_score))
# -*- coding: utf-8 -*- """ Created on Thu Apr 26 00:02:49 2018 @author: Shiratori """ import graphics as gfx import pong_model as pm from q_learning import QLearning if __name__ == '__main__': # Set up environment environment = pm.PongModel(0.5, 0.5, -0.03, 0.01, 0.4, paddleX=0) window = gfx.GFX(wall_x=1, player_x=0) window.fps = 9e16 # Set up model model = QLearning(environment, window, C=5e3, gamma=0.99, explore=-1, threshold=-1, log=True, log_file='q_test_log_(agent_B).txt', mode='test', q_table_file='q_q_table_(agent_B).csv') # Training model.train()
# -*- coding: utf-8 -*- """ Created on Thu Apr 26 00:02:49 2018 @author: Shiratori """ import graphics as gfx import pong_model as pm from q_learning import QLearning if __name__ == '__main__': # Set up environment environment = pm.PongModel( 0.5, 0.5, -0.03, 0.01, 0.5, paddleX=0 ) #initial state changed; defending line on the left. also, paddle now on left, so paddleX changed window = gfx.GFX(wall_x=1, player_x=0) #switched layout window.fps = 9e16 # Set up model model = QLearning(environment, window, C=5e3, gamma=0.99, explore=-1, threshold=-1, log=True, log_file='q_test_log_(1_1)_A.txt', mode='test', q_table_file='q_q_table_(1_1)_A.csv')