def test_tic_tac_toe(self):
     game_spec = TicTacToeGameSpec()
     create_model_func = functools.partial(create_network, game_spec.board_squares(), (100, 100, 100,))
     variables, win_rate = train_policy_gradients(game_spec, create_model_func, None,
                                                  learn_rate=1e-4,
                                                  number_of_games=60000,
                                                  print_results_every=1000,
                                                  batch_size=100,
                                                  randomize_first_player=False)
     self.assertGreater(win_rate, 0.4)
"""
import functools

from common.network_helpers import create_network
from games.tic_tac_toe import TicTacToeGameSpec
from techniques.train_policy_gradient import train_policy_gradients

BATCH_SIZE = 100  # every how many games to do a parameter update?
LEARN_RATE = 1e-4
PRINT_RESULTS_EVERY_X = 100  # every how many games to print the results
NETWORK_FILE_PATH = 'current_network.p'  # path to save the network to
NUMBER_OF_GAMES_TO_RUN = 1000

# to play a different game change this to another spec, e.g TicTacToeXGameSpec or ConnectXGameSpec, to get these to run
# well may require tuning the hyper parameters a bit
game_spec = TicTacToeGameSpec()

create_network_func = functools.partial(create_network,
                                        game_spec.board_squares(),
                                        (100, 100, 100))

train_policy_gradients(game_spec,
                       create_network_func,
                       NETWORK_FILE_PATH,
                       number_of_games=NUMBER_OF_GAMES_TO_RUN,
                       batch_size=BATCH_SIZE,
                       learn_rate=LEARN_RATE,
                       print_results_every=PRINT_RESULTS_EVERY_X)


def second_player_move(board_state, side):
Esempio n. 3
0
                    first_unvisited_node = False

            current_side = -current_side

            result = game_spec.has_winner(current_board_state)

        for path_board_state, path_side in rollout_path:
            state_samples[path_board_state] += 1.
            result *= path_side
            # normalize results to be between 0 and 1 before this it between -1 and 1
            result /= 2.
            result += .5
            state_results[path_board_state] += result

    move_states = {move: game_spec.apply_move(board_state, move, side) for move in game_spec.available_moves(board_state)}

    move = max(move_states, key=lambda x: state_results[move_states[x]] / state_samples[move_states[x]])

    return state_results[move_states[move]] / state_samples[move_states[move]], move


if __name__ == '__main__':
    from games.tic_tac_toe import TicTacToeGameSpec

    sample_board_state = ((1, 0, -1),
                          (1, 0, 0),
                          (0, -1, 0))

    print(monte_carlo_tree_search_uct(TicTacToeGameSpec(), sample_board_state, -1, 10000))
© 2017 GitHub, Inc.
Esempio n. 4
0
 def setUp(self):
     self._game_spec = TicTacToeGameSpec()