def test_symmetry(self): game = TicTacToeGame(3) # pylint: disable=not-callable state = torch.tensor([ [ [1, 2, 3], [4, 5, 6], [7, 8, 9], ], [ [10, 20, 30], [40, 50, 60], [70, 80, 90], ], [ [100, 200, 300], [400, 500, 600], [700, 800, 900], ], ]) policy = torch.tensor([ 11, 22, 33, 44, 55, 66, 77, 88, 99, ]) symmetries = game.symmetries(state, policy) for s, p in symmetries: self.assertTrue(np.array_equal(s[0].flatten().numpy() * 11, p)) self.assertTrue(torch.equal(s[0] * 10, s[1])) self.assertTrue(torch.equal(s[1] * 10, s[2]))
def test_canonical(self): game = TicTacToeGame(3) self.assertEqual(str(game.state.canonical().board), str(game.state.board)) game.play(TicTacToeMove(0, 0)) self.assertNotEqual(str(game.state.canonical().board), str(game.state.board)) game.play(TicTacToeMove(2, 2)) self.assertEqual(str(game.state.canonical().board), str(game.state.board))
from alphazero.util.logging_config import setup_logger from alphazero.util.pit_agents import pit MODELS = [ 'dualres_comp', 'dualres_nocomp', 'res_comp', 'res_nocomp', ] NUM_GAMES = 100 setup_logger('experiment_logs', 'ttt_comp_random.log') logger = logging.getLogger(__name__) device = 'cuda' if torch.cuda.is_available() else 'cpu' logger.info('Training on %s', device) game = TicTacToeGame() state_encoder = TicTacToeStateEncoder(device) config = { 'num_simulations': 50, 'c_puct': 1., 'game_size': 3, 'num_history': 1, 'num_resnet_filters': 256, 'value_head_hidden_dim': 256, 'device': device } random_agent = RandomPlayAgent()
datefmt='%m/%d/%Y %I:%M:%S %p') with open('tictactoe.yaml', 'r') as f: config = yaml.safe_load(f) config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu' def read_move(player: TicTacToePlayer) -> TicTacToeMove: x, y = input(f"{player.name} move: ").split() x, y = int(x), int(y) return TicTacToeMove(x, y) if __name__ == '__main__': game = TicTacToeGame(config['game_size']) state_encoder = TicTacToeStateEncoder(config['device']) net = dual_resnet(game, config) mcts = MonteCarloTreeSearch(game=game, state_encoder=state_encoder, nn=net, config=config) net.load_state_dict( torch.load(os.path.join('pretrained', 'ttt_dualres_comp.pth'))) # net.load_state_dict(torch.load(os.path.join(config['log_dir'], 'best.pth'))) net.eval() agent = AlphaZeroArgMaxAgent(game, state_encoder, net, config) agent_role = random.choice([TicTacToePlayer.X, TicTacToePlayer.O])
from alphazero.alphazero.trainer import AlphaZeroTrainer from alphazero.games.tictactoe import TicTacToeGame from alphazero.util.logging_config import setup_logger with open('tictactoe.yaml', 'r') as f: config = yaml.safe_load(f) config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu' setup_logger(config['log_dir'], 'train.log') logger = logging.getLogger(__name__) logger.info('** Training on %s **', config['device']) logger.info(config) if __name__ == '__main__': game = TicTacToeGame(config['game_size']) state_encoder = TicTacToeStateEncoder(config['device']) net = getattr(nets, config['nn_arch'])(game, config) summary(net, input_size=(config['num_history'], config['game_size'], config['game_size']), batch_size=config['batch_size']) mcts = MonteCarloTreeSearch(game=game, state_encoder=state_encoder, nn=net, config=config) trainer = AlphaZeroTrainer(game=game, state_encoder=state_encoder,