def run_optimal(): vis = Visualiser(env, 80) numActions = env.n_actions playerA = QLearn(actions=list(range(numActions))) playerA.load_Qtable("saved_players/QR") playerB = QLearn(actions=list(range(numActions))) playerB.load_Qtable("saved_players/QR_base") for episode in range(500): observation = env.reset() vis.update_canvas(env) while(True): actionA = playerA.choose_action(str(observation)) actionB = playerB.choose_action(str(observation)) observation_, reward, done = env.step(actionA, actionB) observation = observation_ vis.update_canvas(env) if done: vis.reset() break print("Games won: " + str(env.win_count)) vis.destroy()
def run_optimalB(): numActions = env.n_actions drawProbability = 0.1 decay = 10**(-2. / N_EPISODES * 0.05) vis = Visualiser(env, 80) numActions = env.n_actions playerB = MinimaxQPlayer(numActions, numActions, decay=decay, expl=0.00, gamma=1-drawProbability) playerB.load_Qtable("MR") playerA = QLearn(actions=list(range(numActions))) playerA.load_Qtable("saved_players/MR_base") for episode in range(20): observation = env.reset() vis.update_canvas(env) while(True): actionA = playerA.choose_action(str(observation)) actionB = playerB.choose_action(str(observation)) observation_, reward, done = env.step(actionA, actionB) observation = observation_ vis.update_canvas(env) if done: vis.reset() break print("Games won: " + str(env.win_count)) vis.destroy()