game = Connect4(board)
mdp = FixedGameMDP(game, AlphaBeta(), 1)
env = Environment(mdp)
qlearning = QLearning(env=env,
                      qfunction=TabularQ(random_state=seed),
                      policy=RandomPolicy(env.actions, random_state=seed),
                      learning_rate=0.1,
                      discount_factor=1.0,
                      n_episodes=1000)
qlearning.train(callbacks=[
    QValuesPlotter(state=game,
                   actions=game.legal_moves(),
                   filepath='figures/c4_ql_tab_qvalues.pdf')
])

####################
# Generate figures #
####################

c42pdf('figures/c4_ql_tab_current.pdf', game.board)

for move in game.legal_moves():
    print('*' * 80)
    value = qlearning.qfunction[(game, move)]
    print('Move: %s' % move)
    print('Value: %f' % value)
    new_game = game.copy().make_move(move)
    print(new_game)
    filename = 'figures/c4_ql_tab_move_{}.pdf'.format(move)
    c42pdf(filename, new_game.board)
from capstone.game.games import Connect4 as C4
from capstone.game.utils import c42pdf


def series_to_game(series):
    '''Converts a Pandas Series to a Connect 4 game'''
    cell_map = {'x': 'X', 'o': 'O', 'b': '-'}
    board = [[' '] * C4.COLS for row in range(C4.ROWS)]
    cells = series.iloc[:-1]
    outcome = series.iloc[-1]
    for ix, cell in enumerate(cells):
        row = C4.ROWS - (ix % C4.ROWS) - 1
        col = ix // C4.ROWS
        board[row][col] = cell_map[cell]
    return C4(board), outcome


# Load UCI Connect 4 dataset
df = load_uci_c4()

# Select the instances of the dataset
ixs = [1, 5, 10, 20, 567]

# Generate pdfs of the boards
for i in ixs:
    row = df.iloc[i]
    c4, outcome = series_to_game(row)
    filename = 'figures/c4_exploration_{i}_{outcome}.pdf'.format(
        i=i, outcome=outcome)
    c42pdf(filename, c4.board)
    policy=RandomPolicy(env.actions, random_state=seed),
    learning_rate=0.1,
    discount_factor=1.0,
    n_episodes=4000,
)
qlearning.train(
    callbacks=[
        QValuesPlotter(
            state=game,
            actions=game.legal_moves(),
            filepath='figures/c4_ql_tab_simple_selfplay_progress.pdf'
        )
    ]
)

####################
# Generate figures #
####################

c42pdf('figures/c4_ql_tab_simple_selfplay_cur.pdf', game.board)

for move in game.legal_moves():
    print('*' * 80)
    value = qlearning.qfunction[(game, move)]
    print('Move: {}'.format(move))
    print('Value: %f' % value)
    new_game = game.copy().make_move(move)
    print(new_game)
    filename = 'figures/c4_ql_tab_simple_selfplay_move_{}.pdf'.format(move)
    c42pdf(filename, new_game.board)
Example #4
0
'''
from capstone.game.games import Connect4
from capstone.game.utils import c42pdf
from capstone.rl import GameMDP, Environment
from capstone.rl.learners import QLearningSelfPlay

board = [['X', 'O', 'O', ' ', 'O', ' ', ' '],
         ['X', 'O', 'X', ' ', 'X', ' ', ' '],
         ['O', 'X', 'O', 'X', 'O', 'X', 'O'],
         ['O', 'X', 'O', 'X', 'O', 'X', 'O'],
         ['X', 'O', 'X', 'O', 'X', 'O', 'X'],
         ['X', 'O', 'X', 'O', 'X', 'O', 'X']]
game = Connect4(board)
mdp = GameMDP(game)
env = Environment(mdp)
qlearning = QLearningSelfPlay(env, n_episodes=1000, random_state=0)
qlearning.train()
c42pdf('figures/c4_ql_tabular_selfplay_current.pdf', game.board)
print(game)

for move in game.legal_moves():
    print('*' * 80)
    value = qlearning.qf[(game, move)]
    print('Move: %s' % move)
    print('Value: %f' % value)
    new_game = game.copy().make_move(move)
    print(new_game)
    filename = 'figures/c4_ql_tabular_selfplay_move_%s_value_%.4f.pdf' % (
        move, value)
    c42pdf(filename, new_game.board)
Example #5
0
# The Complete Book of Connect 4
# Problem Set A (Easy)
# Problem 1, Page 16
# Red wins with C4
#         A    B    C    D    E    F    G
board = [
    [' ', ' ', ' ', ' ', ' ', ' ', ' '],  # 6
    [' ', ' ', ' ', ' ', ' ', ' ', ' '],  # 5
    [' ', ' ', ' ', 'O', ' ', 'O', ' '],  # 4
    [' ', ' ', 'O', 'X', ' ', 'X', ' '],  # 3
    [' ', ' ', 'X', 'O', ' ', 'X', ' '],  # 2
    [' ', 'O', 'O', 'X', 'X', 'X', 'O']
]  # 1

c42pdf('figures/c4_dqn_easy_board.pdf', board)
game = Connect4(board)
mdp = GameMDP(game)
env = Environment(mdp)
qnetwork = QNetwork(move_mapper,
                    n_input_units=42,
                    n_hidden_layers=2,
                    n_output_units=7,
                    n_hidden_units=100,
                    learning_rate=0.01)
egreedy = EGreedy(provider=env.actions,
                  qfunction=qnetwork,
                  epsilon=1.0,
                  selfplay=True,
                  random_state=seed)
qlearning = ApproximateQLearning(env=env,