Exemple #1
0
from capstone.game.games import Connect4
from capstone.game.players import RandPlayer
from capstone.game.utils import play_series


class MyPlayer(object):
    def choose_move(self, game):
        return game.legal_moves()[0]


my = MyPlayer()

game = Connect4()
players = [my, RandPlayer()]
play_series(game, players, n_matches=1000)
Exemple #2
0
# Problem Set J (Challenger)
# Problem 273, Page 79
# Red wins with E2
#           A    B    C    D    E    F    G
challenger = [
    [' ', ' ', ' ', ' ', ' ', ' ', ' '],  # 6
    [' ', ' ', ' ', ' ', ' ', ' ', ' '],  # 5
    [' ', ' ', ' ', ' ', ' ', 'O', ' '],  # 4
    [' ', ' ', ' ', ' ', ' ', 'X', ' '],  # 3
    [' ', ' ', ' ', ' ', ' ', 'O', ' '],  # 2
    [' ', ' ', 'O', 'X', 'O', 'X', 'X']
]  # 1

# c42pdf(filename, c4.board)

game = Connect4(challenger)
mdp = GameMDP(game)
env = Environment(mdp)
qnetwork = QNetwork(move_mapper,
                    n_input_units=42,
                    n_hidden_layers=1,
                    n_output_units=7,
                    n_hidden_units=100,
                    learning_rate=0.01)
egreedy = EGreedy(provider=env.actions,
                  qfunction=qnetwork,
                  epsilon=1.0,
                  selfplay=True)
qlearning = ApproximateQLearning(env=env,
                                 qfunction=qnetwork,
                                 policy=egreedy,
from capstone.game.players import AlphaBeta
from capstone.game.utils import c42pdf
from capstone.rl import FixedGameMDP, Environment
from capstone.rl.learners import QLearning
from capstone.rl.policies import RandomPolicy
from capstone.rl.utils import QValuesPlotter
from capstone.rl.value_functions import TabularQ

seed = 23
board = [['X', 'O', 'O', ' ', 'O', ' ', ' '],
         ['X', 'O', 'X', ' ', 'X', ' ', ' '],
         ['O', 'X', 'O', 'X', 'O', 'X', 'O'],
         ['O', 'X', 'O', 'X', 'O', 'X', 'O'],
         ['X', 'O', 'X', 'O', 'X', 'O', 'X'],
         ['X', 'O', 'X', 'O', 'X', 'O', 'X']]
game = Connect4(board)
mdp = FixedGameMDP(game, AlphaBeta(), 1)
env = Environment(mdp)
qlearning = QLearning(env=env,
                      qfunction=TabularQ(random_state=seed),
                      policy=RandomPolicy(env.actions, random_state=seed),
                      learning_rate=0.1,
                      discount_factor=1.0,
                      n_episodes=1000)
qlearning.train(callbacks=[
    QValuesPlotter(state=game,
                   actions=game.legal_moves(),
                   filepath='figures/c4_ql_tab_qvalues.pdf')
])

####################
Exemple #4
0
from capstone.game.games import Connect4
from capstone.game.players import RandPlayer
from capstone.game.utils import play_match

game = Connect4('XO-----' 'XO-----' 'OXOXOXO' 'OXOXOXO' 'XOXOXOX' 'XOXOXOX')
players = [RandPlayer(), RandPlayer()]
play_match(game, players)