Esempio n. 1
0
def run_optimal():
	vis = Visualiser(env, 80)
	numActions = env.n_actions
	playerA = QLearn(actions=list(range(numActions)))
	playerA.load_Qtable("saved_players/QR")
	playerB = QLearn(actions=list(range(numActions)))
	playerB.load_Qtable("saved_players/QR_base")
	for episode in range(500):
		observation = env.reset()
		vis.update_canvas(env)
		while(True):
			actionA = playerA.choose_action(str(observation))
			actionB = playerB.choose_action(str(observation))
			observation_, reward, done = env.step(actionA, actionB)
			observation = observation_
			vis.update_canvas(env)
			if done:
				vis.reset()
				break
	print("Games won: " + str(env.win_count))
	vis.destroy()
Esempio n. 2
0
def run_optimalB():
	numActions = env.n_actions
	drawProbability = 0.1
	decay = 10**(-2. / N_EPISODES * 0.05)
	vis = Visualiser(env, 80)
	numActions = env.n_actions
	playerB = MinimaxQPlayer(numActions, numActions, decay=decay, expl=0.00, gamma=1-drawProbability)
	playerB.load_Qtable("MR")
	playerA = QLearn(actions=list(range(numActions)))
	playerA.load_Qtable("saved_players/MR_base")
	for episode in range(20):
		observation = env.reset()
		vis.update_canvas(env)
		while(True):
			actionA = playerA.choose_action(str(observation))
			actionB = playerB.choose_action(str(observation))
			observation_, reward, done = env.step(actionA, actionB)
			observation = observation_
			vis.update_canvas(env)
			if done:
				vis.reset()
				break
	print("Games won: " + str(env.win_count))
	vis.destroy()