Exemple #1
0
 def test_symmetry(self):
     game = TicTacToeGame(3)
     # pylint: disable=not-callable
     state = torch.tensor([
         [
             [1, 2, 3],
             [4, 5, 6],
             [7, 8, 9],
         ],
         [
             [10, 20, 30],
             [40, 50, 60],
             [70, 80, 90],
         ],
         [
             [100, 200, 300],
             [400, 500, 600],
             [700, 800, 900],
         ],
     ])
     policy = torch.tensor([
         11,
         22,
         33,
         44,
         55,
         66,
         77,
         88,
         99,
     ])
     symmetries = game.symmetries(state, policy)
     for s, p in symmetries:
         self.assertTrue(np.array_equal(s[0].flatten().numpy() * 11, p))
         self.assertTrue(torch.equal(s[0] * 10, s[1]))
         self.assertTrue(torch.equal(s[1] * 10, s[2]))
Exemple #2
0
 def test_canonical(self):
     game = TicTacToeGame(3)
     self.assertEqual(str(game.state.canonical().board),
                      str(game.state.board))
     game.play(TicTacToeMove(0, 0))
     self.assertNotEqual(str(game.state.canonical().board),
                         str(game.state.board))
     game.play(TicTacToeMove(2, 2))
     self.assertEqual(str(game.state.canonical().board),
                      str(game.state.board))
from alphazero.util.logging_config import setup_logger
from alphazero.util.pit_agents import pit

MODELS = [
    'dualres_comp',
    'dualres_nocomp',
    'res_comp',
    'res_nocomp',
]
NUM_GAMES = 100

setup_logger('experiment_logs', 'ttt_comp_random.log')
logger = logging.getLogger(__name__)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
logger.info('Training on %s', device)
game = TicTacToeGame()
state_encoder = TicTacToeStateEncoder(device)

config = {
    'num_simulations': 50,
    'c_puct': 1.,
    'game_size': 3,
    'num_history': 1,
    'num_resnet_filters': 256,
    'value_head_hidden_dim': 256,
    'device': device
}

random_agent = RandomPlayAgent()

                    datefmt='%m/%d/%Y %I:%M:%S %p')

with open('tictactoe.yaml', 'r') as f:
    config = yaml.safe_load(f)

config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'


def read_move(player: TicTacToePlayer) -> TicTacToeMove:
    x, y = input(f"{player.name} move: ").split()
    x, y = int(x), int(y)
    return TicTacToeMove(x, y)


if __name__ == '__main__':
    game = TicTacToeGame(config['game_size'])
    state_encoder = TicTacToeStateEncoder(config['device'])

    net = dual_resnet(game, config)
    mcts = MonteCarloTreeSearch(game=game,
                                state_encoder=state_encoder,
                                nn=net,
                                config=config)

    net.load_state_dict(
        torch.load(os.path.join('pretrained', 'ttt_dualres_comp.pth')))
    # net.load_state_dict(torch.load(os.path.join(config['log_dir'], 'best.pth')))
    net.eval()
    agent = AlphaZeroArgMaxAgent(game, state_encoder, net, config)

    agent_role = random.choice([TicTacToePlayer.X, TicTacToePlayer.O])
Exemple #5
0
from alphazero.alphazero.trainer import AlphaZeroTrainer
from alphazero.games.tictactoe import TicTacToeGame
from alphazero.util.logging_config import setup_logger

with open('tictactoe.yaml', 'r') as f:
    config = yaml.safe_load(f)

config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

setup_logger(config['log_dir'], 'train.log')
logger = logging.getLogger(__name__)
logger.info('** Training on %s **', config['device'])
logger.info(config)

if __name__ == '__main__':
    game = TicTacToeGame(config['game_size'])
    state_encoder = TicTacToeStateEncoder(config['device'])

    net = getattr(nets, config['nn_arch'])(game, config)

    summary(net,
            input_size=(config['num_history'], config['game_size'],
                        config['game_size']),
            batch_size=config['batch_size'])

    mcts = MonteCarloTreeSearch(game=game,
                                state_encoder=state_encoder,
                                nn=net,
                                config=config)
    trainer = AlphaZeroTrainer(game=game,
                               state_encoder=state_encoder,