def ql_vs_minmax(visualise): print("ql vs minmax ql") numActions = env.n_actions drawProbability = 0.1 decay = 10**(-2. / N_EPISODES * 0.05) if(visualise): vis = Visualiser(env, 80) numActions = env.n_actions start_time = time.time() ql_wins = 0 minmax_wins = 0 playerA = QLearn(actions=list(range(numActions)), reward_decay=0.7) playerB = MinimaxQPlayer(numActions, numActions, decay=decay, expl=0.01, gamma=1-drawProbability) playerA.load_Qtable('saved_players/QR') playerB.load_Qtable("MR") # no explore iterations = 5000 for episode in range(iterations): # initial observation observation = env.reset() # print(str(episode)) if(episode % 100 == 0): print(str(float(episode) / iterations * 100) + "%") # if(episode > iterations - 100): # vis.update_canvas(env) while True: # RL choose action based on observation actionA = playerA.choose_action(str(observation)) actionB = playerB.choose_action(str(observation)) # RL take action and get next observation and reward observation_, reward, done = env.step(actionA, actionB) if reward == 1: ql_wins += 1 elif reward == -1: minmax_wins += 1 observation = observation_ if(visualise): vis.update_canvas(env) if done: if(visualise): vis.reset() break return (ql_wins, minmax_wins)
def run_optimal(): vis = Visualiser(env, 80) numActions = env.n_actions playerA = QLearn(actions=list(range(numActions))) playerA.load_Qtable("saved_players/QR") playerB = QLearn(actions=list(range(numActions))) playerB.load_Qtable("saved_players/QR_base") for episode in range(500): observation = env.reset() vis.update_canvas(env) while(True): actionA = playerA.choose_action(str(observation)) actionB = playerB.choose_action(str(observation)) observation_, reward, done = env.step(actionA, actionB) observation = observation_ vis.update_canvas(env) if done: vis.reset() break print("Games won: " + str(env.win_count)) vis.destroy()
def run_optimalB(): numActions = env.n_actions drawProbability = 0.1 decay = 10**(-2. / N_EPISODES * 0.05) vis = Visualiser(env, 80) numActions = env.n_actions playerB = MinimaxQPlayer(numActions, numActions, decay=decay, expl=0.00, gamma=1-drawProbability) playerB.load_Qtable("MR") playerA = QLearn(actions=list(range(numActions))) playerA.load_Qtable("saved_players/MR_base") for episode in range(20): observation = env.reset() vis.update_canvas(env) while(True): actionA = playerA.choose_action(str(observation)) actionB = playerB.choose_action(str(observation)) observation_, reward, done = env.step(actionA, actionB) observation = observation_ vis.update_canvas(env) if done: vis.reset() break print("Games won: " + str(env.win_count)) vis.destroy()