Network.backward(batch_y) loss.append(Network.update_weights(5 * 0.95**i)) print(loss[-1]) if (i % 20 == 0): YY = np.argmax(a, axis=1) error = BATCH_SIZE - sum(np.equal(batch_y, YY)) print(i, "epochs. Error:", error) print("# 2s:", np.sum(np.equal(YY, 2))) print("Training Done. Loss =", loss) #a = Network.forward(X) plt.figure() plt.plot(loss) plt.title('Loss over epochs') window = gfx.GFX() window.fps = 10e4 # you can modify this for debugging purposes, default=30 Game = png.PongModel(0.5, 0.5, 0.03, 0.01, 0.4) # initialize state games = 0 totalscore = 0 while 1: # main loop if (window._open == False): break state = Game.get_state() # state = np.divide(state-means, stds).tolist() actionlist = Network.forward(state) maxidx = 0 for i in range(1, len(actionlist)):
Network.backward(batch_y) dW = Network.dW W = Network.weights # print(W[-1][0:5]) aa = Network.a # print("dW", Network.dW) loss.append(Network.update_weights(5 * 0.95**i)) print(loss[-1]) YY = np.argmax(a, axis=1) error.append(BATCH_SIZE - sum(np.equal(batch_y, YY))) if (i % 20 == 0): print(i, "epochs. Error:", error[-1]) print("Training Done. Loss =", loss[-1]) window = gfx.GFX(players=2) window.fps = 30 # you can modify this for debugging purposes, default=30 Game = png.PongModel(0.5, 0.5, 0.03, 0.01, 0.4) # initialize state Game.init2(0.4, 0) window.win.bind('<Up>', lambda eff: UpKey(Game)) window.win.bind('<Down>', lambda eff: DownKey(Game)) games = 0 totalscore = 0 while 1: # main loop if (window._open == False): break state = Game.get_state() # get the values of relevant variables state = np.divide(state - means, stds).tolist() # normalize data
def DownKey(Game): Game.move_down() def UpKey2(Game): # move player 2 Game.move_up(2) def DownKey2(Game): Game.move_down(2) if __name__ == '__main__': window = gfx.GFX(wall_x=0, player_x=1, players=2) #window.fps = 5 # you can modify this for debugging purposes, default=30 Game = png.PongModel(0.5, 0.5, 0.03, 0.01, 0.4, paddleX=1) # initialize state Game.init2(0.4, 0) #################################################### Ignore this window.win.bind('<Up>', lambda eff: UpKey(Game)) window.win.bind('<Down>', lambda eff: DownKey(Game)) window.win.bind('w', lambda eff: UpKey2(Game)) window.win.bind('s', lambda eff: DownKey2(Game)) ######################################################## /ignore while 1: # main loop if (window._open == False): break
""" Created on Thu Apr 26 00:02:49 2018 @author: Shiratori """ import graphics as gfx import pong_model as pm from q_learning import QLearning if __name__ == '__main__': # Set up environment environment = pm.PongModel( 0.5, 0.5, -0.03, 0.01, 0.5, paddleX=0 ) #initial state changed; defending line on the left. also, paddle now on left, so paddleX changed window = gfx.GFX(wall_x=1, player_x=0) #switched layout window.fps = 9e16 # Set up model model = QLearning(environment, window, C=5e3, gamma=0.99, explore=-1, threshold=-1, log=True, log_file='q_test_log_(1_1)_A.txt', mode='test', q_table_file='q_q_table_(1_1)_A.csv') # Training