Ejemplo n.º 1
0
from capstone.game.games import TicTacToe
from capstone.game.players import KerasPlayer, RandPlayer
from capstone.game.utils import play_series

players = [KerasPlayer('models/qltic.h5'), RandPlayer()]
game = TicTacToe()
play_series(game, players, n_matches=1000)
'''
Q-Learning is used to estimate the state-action values for a
Tic-Tac-Toe board position against a fixed Alpha-Beta opponent
'''
from capstone.game.games import TicTacToe
from capstone.game.players import AlphaBeta, RandPlayer
from capstone.game.utils import tic2pdf
from capstone.rl import FixedGameMDP, Environment
from capstone.rl.learners import QLearning
from capstone.rl.policies import RandomPolicy
from capstone.rl.utils import QValuesPlotter
from capstone.rl.value_functions import TabularVF

seed = 23
board = [[' ', ' ', 'X'], [' ', 'X', ' '], ['O', 'O', ' ']]
game = TicTacToe(board)
mdp = FixedGameMDP(game, AlphaBeta(), 1)
env = Environment(mdp)
qlearning = QLearning(env=env,
                      qfunction=TabularVF(random_state=seed),
                      policy=RandomPolicy(action_space=env.action_space,
                                          random_state=seed),
                      learning_rate=0.1,
                      discount_factor=1.0)
qlearning.train(n_episodes=800,
                callbacks=[
                    QValuesPlotter(state=game,
                                   actions=game.legal_moves(),
                                   filepath='figures/tic_ql_tab_qvalues.pdf')
                ])