from alphazero.agents.random import RandomPlayAgent from alphazero.alphazero.nn_modules.nets import dual_resnet, resnet from alphazero.alphazero.state_encoders.ttt_state_encoder import TicTacToeStateEncoder from alphazero.games.tictactoe import TicTacToeGame from alphazero.util.logging_config import setup_logger from alphazero.util.pit_agents import pit MODELS = [ 'dualres_comp', 'dualres_nocomp', 'res_comp', 'res_nocomp', ] NUM_GAMES = 100 setup_logger('experiment_logs', 'ttt_comp_random.log') logger = logging.getLogger(__name__) device = 'cuda' if torch.cuda.is_available() else 'cpu' logger.info('Training on %s', device) game = TicTacToeGame() state_encoder = TicTacToeStateEncoder(device) config = { 'num_simulations': 50, 'c_puct': 1., 'game_size': 3, 'num_history': 1, 'num_resnet_filters': 256, 'value_head_hidden_dim': 256, 'device': device }
import yaml from torchsummary import summary import alphazero.alphazero.nn_modules.nets as nets from alphazero.alphazero.mcts import MonteCarloTreeSearch from alphazero.alphazero.state_encoders.gomoku_state_encoder import GomokuStateEncoder from alphazero.alphazero.trainer import AlphaZeroTrainer from alphazero.games.gomoku import GomokuGame from alphazero.util.logging_config import setup_logger with open('gomoku.yaml', 'r') as f: config = yaml.safe_load(f) config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu' setup_logger(config['log_dir'], 'train.log') logger = logging.getLogger(__name__) logger.info('** Training on %s **', config['device']) logger.info(config) if __name__ == '__main__': game = GomokuGame(config['game_size']) state_encoder = GomokuStateEncoder(config['device'], config['num_history']) net = getattr(nets, config['nn_arch'])(game, config) summary(net, input_size=(config['num_history'], config['game_size'], config['game_size']), batch_size=config['batch_size'])