def _selfplay(n): chessenv = Game() memory = Memory(config.MEMORY_SIZE) current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119, ) + chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119, ) + chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS) best_NN.model.set_weights(current_NN.model.get_weights()) current_player = Agent('current_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, best_NN) t0 = time.perf_counter() print('Proc {0} start'.format(n)) _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0=config.TURNS_UNTIL_TAU0, memory=memory) t1 = time.perf_counter() - t0 print('Proc {0} done in {1} seconds'.format(n, t1)) return memory
def play_one_round(): global firstRun global player1 global player2 go_first = input('Do you want to go first (y/n)?') isFirst = 0 if (go_first.lower() == "y"): isFirst = 1 else: isFirst = -1 player1_version = -1 player2_version = ai_player_version if (firstRun): firstRun = False _, _, _, _, player1, player2 = playMatchesBetweenVersions( env, run_version, player1_version, player2_version, episodes, lg.logger_play_game, 0, isFirst) else: playMatches(player1, player2, episodes, lg.logger_play_game, 0, None, isFirst)
while 1: iteration += 1 reload(lg) reload(config) print('ITERATION NUMBER ' + str(iteration)) lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version) print('BEST PLAYER VERSION ' + str(best_player_version)) # SELF PLAY print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...') _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0=config.TURNS_UNTIL_TAU0, memory=memory) print('\n') memory.clear_stmemory() if len(memory.ltmemory) >= config.MEMORY_SIZE: # RETRAINING print('RETRAINING...') current_player.replay(memory.ltmemory) print('') if iteration % 5 == 0: pickle.dump(
else: m_tmp = player1_NN.read(initialise.INITIAL_RUN_NUMBER, player_idx + 1) player1_NN.model.set_weights(m_tmp.get_weights()) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) m_tmp = player2_NN.read(initialise.INITIAL_RUN_NUMBER, opponent_idx + 1) player2_NN.model.set_weights(m_tmp.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) scores, memory, points, sp_scores = funcs.playMatches( player1, player2, EPISODES, lg.logger_main, turns_until_tau0=0, goes_first=1) print('\n') print('-------') print('player1: version {}'.format(player_idx)) print('player2: version {}'.format(opponent_idx)) print('\nSCORES') print(scores) print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES') print(sp_scores) print(points) points_dict[player_idx] += sum(points[player1.name]) points_dict[opponent_idx] += sum(points[player2.name]) plt.figure()
def evaluation_worker(conn): import config from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE import initialise from model import Residual_CNN, import_tf import_tf(1024 * 3) from game import Game from agent import Agent from memory import Memory from funcs import playMatches import loggers as lg import logging import time # initialise new test memory test_memories = Memory(int(MEMORY_SIZE / 10)) env = Game() # initialise new models # create an untrained neural network objects from the config file if len(env.grid_shape) == 2: shape = (1, ) + env.grid_shape else: shape = env.grid_shape if TEAM_SIZE > 1: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) else: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) current_player_version = 0 best_player_version = 0 # If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: best_player_version = initialise.INITIAL_MODEL_VERSION #print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) # otherwise just ensure the weights on the two players are the same else: best_NN.model.set_weights(current_NN.model.get_weights()) current_player = Agent('current_player', config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN) time.sleep(20) while 1: # request current_NN weights conn.send(current_player_version) # wait indefinitely for current_NN weights conn.poll(None) data = conn.recv() if data: # set current_NN weights current_NN.model.set_weights(data) current_player_version += 1 # play tournament games tourney_players = [] if TEAM_SIZE > 1: for i in range(int(PLAYER_COUNT / TEAM_SIZE)): # for each team for k in range( TEAM_SIZE ): # alternate adding best_players and current_players up to the TEAM_SIZE if k % 2 == 0: tourney_players.append(best_player) else: tourney_players.append(current_player) else: for i in range(PLAYER_COUNT): if i % 2 == 0: tourney_players.append(best_player) else: tourney_players.append(current_player) scores, test_memories = playMatches(tourney_players, config.EVAL_EPISODES, lg.logger_tourney, 0.0, test_memories, evaluation=True) test_memories.clear_stmemory() # if the current player is significantly better than the best_player replace the best player # the replacement is made by just copying the weights of current_player's nn to best_player's nn if scores['current_player'] > scores[ 'best_player'] * config.SCORING_THRESHOLD: # if current_NN won send message conn.send(((current_player_version, best_player_version), str(scores))) best_player_version = best_player_version + 1 best_NN.model.set_weights(current_NN.model.get_weights()) best_NN.write(env.name, best_player_version) if len( test_memories.ltmemory ) == test_memories.MEMORY_SIZE and current_player_version % 5 == 0: pickle.dump( memories, open( run_folder + "memory/test_memory" + str(current_player_version).zfill(4) + ".p", "wb")) #print("Evaluating performance of current_NN") #current_player.evaluate_accuracy(test_memories.ltmemory) #print('\n') else: time.sleep(10)
bestPlayer = Agent(bestNN) os.makedirs(os.path.dirname(f'../memory/version0.p'), exist_ok=True) if initialMemory == None: memory = Memory() else: print(f'Loading memory version {initialMemory}...') memory = pickle.load(open(f'../memory/version{initialMemory}.p', "rb")) iteration = 0 while True: iteration += 1 print(f'Iteration {iteration}:') print("Playing matches...") playMatches(bestPlayer, bestPlayer, config.EPISODES, config.TURNS_UNTIL_TAU0, memory) pickle.dump(memory, open(f'../memory/version{iteration}.p', "wb")) if(len(memory.longTerm) >= config.MEMORY_SIZE): print("Retraining...") currentPlayer.replay(memory.longTerm) print("Tournament...") scores = playMatches(currentPlayer, bestPlayer, config.EVAL_EPISODES, 0, memory) print("Scores:") print(scores) if(scores["player1"] > scores["player2"] * config.SCORING_THRESHOLD): bestPlayerVersion += 1 bestNN.model.set_weights(currentNN.model.get_weights()) bestNN.write(f'../models/version{bestPlayerVersion}.h5')
def do_train(iteration, current_player, best_player, best_player_version, current_NN, best_NN, memory): while 1: iteration += 1 reload(lg) reload(config) print('ITERATION NUMBER ' + str(iteration)) lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version) print('BEST PLAYER VERSION ' + str(best_player_version)) ######## SELF PLAY ######## print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...') _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0=config.TURNS_UNTIL_TAU0, memory=memory) print('\n') memory.clear_stmemory() if len(memory.ltmemory) >= config.MEMORY_SIZE: ######## RETRAINING ######## print('RETRAINING...') current_player.replay(memory.ltmemory) print('') if iteration % 5 == 0: pickle.dump(memory, open(run_folder + "memory/memory" + str(iteration).zfill(4) + ".p", "wb")) lg.logger_memory.info('====================') lg.logger_memory.info('NEW MEMORIES') lg.logger_memory.info('====================') memory_samp = random.sample(memory.ltmemory, min(1000, len(memory.ltmemory))) for s in memory_samp: current_value, current_probs, _ = current_player.get_preds(s['state']) best_value, best_probs, _ = best_player.get_preds(s['state']) lg.logger_memory.info('MCTS VALUE FOR %s: %f', s['playerTurn'], s['value']) lg.logger_memory.info('CUR PRED VALUE FOR %s: %f', s['playerTurn'], current_value) lg.logger_memory.info('BES PRED VALUE FOR %s: %f', s['playerTurn'], best_value) lg.logger_memory.info('THE MCTS ACTION VALUES: %s', ['%.2f' % elem for elem in s['AV']]) lg.logger_memory.info('CUR PRED ACTION VALUES: %s', ['%.2f' % elem for elem in current_probs]) lg.logger_memory.info('BES PRED ACTION VALUES: %s', ['%.2f' % elem for elem in best_probs]) lg.logger_memory.info('ID: %s', s['state'].id) lg.logger_memory.info('INPUT TO MODEL: %s', current_player.model.convertToModelInput(s['state'])) s['state'].render(lg.logger_memory) ######## TOURNAMENT ######## print('TOURNAMENT...') scores, _, points, sp_scores = playMatches(best_player, current_player, config.EVAL_EPISODES, lg.logger_tourney, turns_until_tau0=0, memory=None) print('\nSCORES') print(scores) print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES') print(sp_scores) # print(points) print('\n\n') if scores['current_player'] > scores['best_player'] * config.SCORING_THRESHOLD: best_player_version = best_player_version + 1 best_NN.model.set_weights(current_NN.model.get_weights()) best_NN.write(env.name, best_player_version) else: print('MEMORY SIZE: ' + str(len(memory.ltmemory)))
def self_play_worker(conn): import os import config from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE from memory import Memory from settings import run_folder, run_archive_folder import initialise from game import Game, GameState from agent import Agent from model import Residual_CNN, import_tf import_tf(1024 * 3) from shutil import copyfile from funcs import playMatches import loggers as lg import logging import random env = Game() ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file if len(env.grid_shape) == 2: shape = (1, ) + env.grid_shape else: shape = env.grid_shape if TEAM_SIZE > 1: best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) else: best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) best_player_version = 0 best_NN.model.set_weights(opponent_NN.model.get_weights()) best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN) opponent_player = Agent('selected_opponent', config.MCTS_SIMS, config.CPUCT, opponent_NN) if initialise.INITIAL_ITERATION != None: iteration = initialise.INITIAL_ITERATION else: iteration = 0 memories = Memory(150 * config.EPISODES) while 1: iteration += 1 # request best_NN weights conn.send(best_player_version) # wait indefinitely for best_NN weights conn.poll(None) data = conn.recv() #print('recieved: {}'.format(data)) # if weights different set weights if data: best_NN.model.set_weights(data[1]) best_player_version = data[0] if len(memories.ltmemory) != 0: # send new memories (skip first loop) conn.send(memories.ltmemory) memories = Memory(150 * config.EPISODES) ######## CREATE LIST OF PLAYERS ####### # for training it is just 2 copies of the best_player vs. 2 copies of another randomly selected model filenames = os.listdir('run/models/') filenames = [name for name in filenames if '.h5' == name[-3:]] if filenames: opponent = random.choice(filenames) m_tmp = opponent_NN.read_specific('run/models/' + opponent) opponent_NN.model.set_weights(m_tmp.get_weights()) self_play_players = [] for i in range(PLAYER_COUNT): if i % 2 == 0: self_play_players.append(best_player) else: self_play_players.append(opponent_player) else: self_play_players = [] for i in range(PLAYER_COUNT): self_play_players.append(best_player) #print("Version {} randomly selected to play against version {}".format(int(opponent[-7:-3]), best_player_version)) ######## SELF PLAY ######## #epsilon = init_epsilon - iteration * (init_epsilon / 50.0) epsilon = 0 #print('Current epsilon: {}'.format(epsilon)) print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...') _, memories = playMatches(self_play_players, config.EPISODES, lg.logger_main, epsilon, memory=memories)
iteration += 1 # reload(lg) # reload(config) print('ITERATION NUMBER ' + str(iteration)) # lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version) print('BEST PLAYER VERSION ' + str(best_player_version)) ######## SELF PLAY ######## print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...') _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, None, turns_until_tau0=config.TURNS_UNTIL_TAU0, memory=memory, board_size=config.BOARD_SIZE) print('\n') memory.clear_stmemory() if len(memory.ltmemory) >= config.MEMORY_SIZE: ######## RETRAINING ######## print('RETRAINING...') current_player.replay(memory.ltmemory) print('') if iteration % 5 == 0:
import random from importlib import reload from game import Game, GameState from agent import Agent from memory import Memory from funcs import playMatches import loggers as lg from settings import run_folder, run_archive_folder import initialise import pickle import config # If loading an existing neural network, copy the config file to root if initialise.INITIAL_RUN_NUMBER != None: copyfile( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py') print('\n') ######## CREATE THE PLAYERS ######## iteration = 0 while 1: iteration += 1 print('ITERATION NUMBER ' + str(iteration)) _, _, _ = playMatches(config.EPISODES, lg.logger_main, turns_until_tau0=config.TURNS_UNTIL_TAU0) print('\n')
# -*- coding: utf-8 -*- # %matplotlib inline import numpy as np np.set_printoptions(suppress=True) from shutil import copyfile import random from importlib import reload from game import Game, GameState from agent import Agent from funcs import playMatches from settings import run_folder, run_archive_folder import pickle import config print('\n') ######## CREATE THE PLAYERS ######## iteration = 0 while 1: iteration += 1 print('ITERATION NUMBER ' + str(iteration)) _, _, _ = playMatches(config.EPISODES) print('\n')
iteration = 0 while 1: iteration += 1 reload(lg) reload(config) print('ITERATION NUMBER ' + str(iteration)) lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version) print('BEST PLAYER VERSION ' + str(best_player_version)) ######## SELF PLAY ######## print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...') _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory) print('\n') memory.clear_stmemory() if len(memory.ltmemory) >= config.MEMORY_SIZE: ######## RETRAINING ######## print('RETRAINING...') current_player.replay(memory.ltmemory) print('') if iteration % 5 == 0: pickle.dump( memory, open( run_folder + "memory/memory" + iteration + ".p", "wb" ) ) lg.logger_memory.info('====================')
run_version = 1 player1version = 10 player2version = 50 EPISODES = 7 logger = loggers.logger_tourney turns_until_tau0 = 0 env = Game() network = ResCNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) network.load(env.name, run_version, player1version) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, network) network.load(env.name, run_version, player2version) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, network) print('Players are ready, Tourney begins!') goes_first = 0 scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first) print(scores) print(points) print(sp_scores)