from alphazero.agents.random import RandomPlayAgent
from alphazero.alphazero.nn_modules.nets import dual_resnet, resnet
from alphazero.alphazero.state_encoders.ttt_state_encoder import TicTacToeStateEncoder
from alphazero.games.tictactoe import TicTacToeGame
from alphazero.util.logging_config import setup_logger
from alphazero.util.pit_agents import pit

MODELS = [
    'dualres_comp',
    'dualres_nocomp',
    'res_comp',
    'res_nocomp',
]
NUM_GAMES = 100

setup_logger('experiment_logs', 'ttt_comp_random.log')
logger = logging.getLogger(__name__)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
logger.info('Training on %s', device)
game = TicTacToeGame()
state_encoder = TicTacToeStateEncoder(device)

config = {
    'num_simulations': 50,
    'c_puct': 1.,
    'game_size': 3,
    'num_history': 1,
    'num_resnet_filters': 256,
    'value_head_hidden_dim': 256,
    'device': device
}
Beispiel #2
0
import yaml
from torchsummary import summary

import alphazero.alphazero.nn_modules.nets as nets
from alphazero.alphazero.mcts import MonteCarloTreeSearch
from alphazero.alphazero.state_encoders.gomoku_state_encoder import GomokuStateEncoder
from alphazero.alphazero.trainer import AlphaZeroTrainer
from alphazero.games.gomoku import GomokuGame
from alphazero.util.logging_config import setup_logger

with open('gomoku.yaml', 'r') as f:
    config = yaml.safe_load(f)

config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

setup_logger(config['log_dir'], 'train.log')
logger = logging.getLogger(__name__)
logger.info('** Training on %s **', config['device'])
logger.info(config)

if __name__ == '__main__':
    game = GomokuGame(config['game_size'])
    state_encoder = GomokuStateEncoder(config['device'], config['num_history'])

    net = getattr(nets, config['nn_arch'])(game, config)

    summary(net,
            input_size=(config['num_history'], config['game_size'],
                        config['game_size']),
            batch_size=config['batch_size'])