Esempio n. 1
0
def ql_vs_minmax(visualise):
	print("ql vs minmax ql")
	numActions = env.n_actions
	drawProbability = 0.1
	decay = 10**(-2. / N_EPISODES * 0.05)
	if(visualise):
		vis = Visualiser(env, 80)
	numActions = env.n_actions
	start_time = time.time()
	ql_wins = 0
	minmax_wins = 0
	playerA = QLearn(actions=list(range(numActions)), reward_decay=0.7)
	playerB = MinimaxQPlayer(numActions, numActions, decay=decay, expl=0.01, gamma=1-drawProbability)
	playerA.load_Qtable('saved_players/QR')
	playerB.load_Qtable("MR")
	# no explore
	iterations = 5000
	for episode in range(iterations):
		# initial observation
		observation = env.reset()
		# print(str(episode))
		if(episode % 100 == 0):
			print(str(float(episode) / iterations * 100) + "%")
		# if(episode > iterations - 100):
		# 	vis.update_canvas(env)
		while True:
			# RL choose action based on observation
			actionA = playerA.choose_action(str(observation))
			actionB = playerB.choose_action(str(observation))

			# RL take action and get next observation and reward
			observation_, reward, done = env.step(actionA, actionB)
			if reward == 1:
				ql_wins += 1
			elif reward == -1:
				minmax_wins += 1
			
			observation = observation_
			if(visualise):
				vis.update_canvas(env)
			if done:
				if(visualise):
					vis.reset()
				break
	return (ql_wins, minmax_wins)
Esempio n. 2
0
def run_optimal():
	vis = Visualiser(env, 80)
	numActions = env.n_actions
	playerA = QLearn(actions=list(range(numActions)))
	playerA.load_Qtable("saved_players/QR")
	playerB = QLearn(actions=list(range(numActions)))
	playerB.load_Qtable("saved_players/QR_base")
	for episode in range(500):
		observation = env.reset()
		vis.update_canvas(env)
		while(True):
			actionA = playerA.choose_action(str(observation))
			actionB = playerB.choose_action(str(observation))
			observation_, reward, done = env.step(actionA, actionB)
			observation = observation_
			vis.update_canvas(env)
			if done:
				vis.reset()
				break
	print("Games won: " + str(env.win_count))
	vis.destroy()
Esempio n. 3
0
def run_optimalB():
	numActions = env.n_actions
	drawProbability = 0.1
	decay = 10**(-2. / N_EPISODES * 0.05)
	vis = Visualiser(env, 80)
	numActions = env.n_actions
	playerB = MinimaxQPlayer(numActions, numActions, decay=decay, expl=0.00, gamma=1-drawProbability)
	playerB.load_Qtable("MR")
	playerA = QLearn(actions=list(range(numActions)))
	playerA.load_Qtable("saved_players/MR_base")
	for episode in range(20):
		observation = env.reset()
		vis.update_canvas(env)
		while(True):
			actionA = playerA.choose_action(str(observation))
			actionB = playerB.choose_action(str(observation))
			observation_, reward, done = env.step(actionA, actionB)
			observation = observation_
			vis.update_canvas(env)
			if done:
				vis.reset()
				break
	print("Games won: " + str(env.win_count))
	vis.destroy()