def _selfplay(n):
    chessenv = Game()
    memory = Memory(config.MEMORY_SIZE)
    current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                              (119, ) + chessenv.grid_shape,
                              chessenv.action_size, config.HIDDEN_CNN_LAYERS)
    best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                           (119, ) + chessenv.grid_shape, chessenv.action_size,
                           config.HIDDEN_CNN_LAYERS)
    best_NN.model.set_weights(current_NN.model.get_weights())
    current_player = Agent('current_player', chessenv.state_size,
                           chessenv.action_size, config.MCTS_SIMS,
                           config.CPUCT, current_NN)
    best_player = Agent('best_player', chessenv.state_size,
                        chessenv.action_size, config.MCTS_SIMS, config.CPUCT,
                        best_NN)

    t0 = time.perf_counter()
    print('Proc {0} start'.format(n))
    _, memory, _, _ = playMatches(best_player,
                                  best_player,
                                  config.EPISODES,
                                  lg.logger_main,
                                  turns_until_tau0=config.TURNS_UNTIL_TAU0,
                                  memory=memory)
    t1 = time.perf_counter() - t0
    print('Proc {0} done in {1} seconds'.format(n, t1))
    return memory
Ejemplo n.º 2
0
def play_one_round():
    global firstRun
    global player1
    global player2
    go_first = input('Do you want to go first (y/n)?')
    isFirst = 0
    if (go_first.lower() == "y"):
        isFirst = 1
    else:
        isFirst = -1
    player1_version = -1
    player2_version = ai_player_version

    if (firstRun):
        firstRun = False
        _, _, _, _, player1, player2 = playMatchesBetweenVersions(
            env, run_version, player1_version, player2_version, episodes,
            lg.logger_play_game, 0, isFirst)
    else:
        playMatches(player1, player2, episodes, lg.logger_play_game, 0, None,
                    isFirst)
while 1:

    iteration += 1
    reload(lg)
    reload(config)

    print('ITERATION NUMBER ' + str(iteration))

    lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    # SELF PLAY
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player,
                                  best_player,
                                  config.EPISODES,
                                  lg.logger_main,
                                  turns_until_tau0=config.TURNS_UNTIL_TAU0,
                                  memory=memory)
    print('\n')

    memory.clear_stmemory()

    if len(memory.ltmemory) >= config.MEMORY_SIZE:

        # RETRAINING
        print('RETRAINING...')
        current_player.replay(memory.ltmemory)
        print('')

        if iteration % 5 == 0:
            pickle.dump(
Ejemplo n.º 4
0
        else:
            m_tmp = player1_NN.read(initialise.INITIAL_RUN_NUMBER,
                                    player_idx + 1)
            player1_NN.model.set_weights(m_tmp.get_weights())
            player1 = Agent('player1', env.state_size, env.action_size,
                            config.MCTS_SIMS, config.CPUCT, player1_NN)
            m_tmp = player2_NN.read(initialise.INITIAL_RUN_NUMBER,
                                    opponent_idx + 1)
            player2_NN.model.set_weights(m_tmp.get_weights())
            player2 = Agent('player2', env.state_size, env.action_size,
                            config.MCTS_SIMS, config.CPUCT, player2_NN)

            scores, memory, points, sp_scores = funcs.playMatches(
                player1,
                player2,
                EPISODES,
                lg.logger_main,
                turns_until_tau0=0,
                goes_first=1)
            print('\n')
            print('-------')
            print('player1: version {}'.format(player_idx))
            print('player2: version {}'.format(opponent_idx))
            print('\nSCORES')
            print(scores)
            print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
            print(sp_scores)
            print(points)
            points_dict[player_idx] += sum(points[player1.name])
            points_dict[opponent_idx] += sum(points[player2.name])
            plt.figure()
def evaluation_worker(conn):
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE
    import initialise
    from model import Residual_CNN, import_tf
    import_tf(1024 * 3)
    from game import Game
    from agent import Agent
    from memory import Memory
    from funcs import playMatches
    import loggers as lg
    import logging
    import time

    # initialise new test memory
    test_memories = Memory(int(MEMORY_SIZE / 10))

    env = Game()

    # initialise new models
    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1, ) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  shape, int(PLAYER_COUNT / TEAM_SIZE),
                                  config.HIDDEN_CNN_LAYERS)
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               int(PLAYER_COUNT / TEAM_SIZE),
                               config.HIDDEN_CNN_LAYERS)
    else:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  shape, PLAYER_COUNT,
                                  config.HIDDEN_CNN_LAYERS)
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)

    current_player_version = 0
    best_player_version = 0
    # If loading an existing neural netwrok, set the weights from that model
    if initialise.INITIAL_MODEL_VERSION != None:
        best_player_version = initialise.INITIAL_MODEL_VERSION
        #print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
        m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER,
                             initialise.INITIAL_MODEL_VERSION)
        current_NN.model.set_weights(m_tmp.get_weights())
        best_NN.model.set_weights(m_tmp.get_weights())
    # otherwise just ensure the weights on the two players are the same
    else:
        best_NN.model.set_weights(current_NN.model.get_weights())

    current_player = Agent('current_player', config.MCTS_SIMS, config.CPUCT,
                           current_NN)
    best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN)

    time.sleep(20)

    while 1:
        # request current_NN weights
        conn.send(current_player_version)
        # wait indefinitely for current_NN weights
        conn.poll(None)
        data = conn.recv()

        if data:

            # set current_NN weights
            current_NN.model.set_weights(data)
            current_player_version += 1

            # play tournament games
            tourney_players = []
            if TEAM_SIZE > 1:
                for i in range(int(PLAYER_COUNT / TEAM_SIZE)):  # for each team
                    for k in range(
                            TEAM_SIZE
                    ):  # alternate adding best_players and current_players up to the TEAM_SIZE
                        if k % 2 == 0:
                            tourney_players.append(best_player)
                        else:
                            tourney_players.append(current_player)
            else:
                for i in range(PLAYER_COUNT):
                    if i % 2 == 0:
                        tourney_players.append(best_player)
                    else:
                        tourney_players.append(current_player)

            scores, test_memories = playMatches(tourney_players,
                                                config.EVAL_EPISODES,
                                                lg.logger_tourney,
                                                0.0,
                                                test_memories,
                                                evaluation=True)
            test_memories.clear_stmemory()

            # if the current player is significantly better than the best_player replace the best player
            # the replacement is made by just copying the weights of current_player's nn to best_player's nn
            if scores['current_player'] > scores[
                    'best_player'] * config.SCORING_THRESHOLD:
                # if current_NN won send message
                conn.send(((current_player_version, best_player_version),
                           str(scores)))

                best_player_version = best_player_version + 1
                best_NN.model.set_weights(current_NN.model.get_weights())
                best_NN.write(env.name, best_player_version)

            if len(
                    test_memories.ltmemory
            ) == test_memories.MEMORY_SIZE and current_player_version % 5 == 0:
                pickle.dump(
                    memories,
                    open(
                        run_folder + "memory/test_memory" +
                        str(current_player_version).zfill(4) + ".p", "wb"))

                #print("Evaluating performance of current_NN")
                #current_player.evaluate_accuracy(test_memories.ltmemory)
                #print('\n')
        else:
            time.sleep(10)
Ejemplo n.º 6
0
bestPlayer = Agent(bestNN)

os.makedirs(os.path.dirname(f'../memory/version0.p'), exist_ok=True)
if initialMemory == None:
    memory = Memory()
else:
    print(f'Loading memory version {initialMemory}...')
    memory = pickle.load(open(f'../memory/version{initialMemory}.p', "rb"))

iteration = 0

while True:
    iteration += 1
    print(f'Iteration {iteration}:')
    print("Playing matches...")
    playMatches(bestPlayer, bestPlayer, config.EPISODES, config.TURNS_UNTIL_TAU0, memory)

    pickle.dump(memory, open(f'../memory/version{iteration}.p', "wb"))

    if(len(memory.longTerm) >= config.MEMORY_SIZE):
        print("Retraining...")
        currentPlayer.replay(memory.longTerm)
        
        print("Tournament...")
        scores = playMatches(currentPlayer, bestPlayer, config.EVAL_EPISODES, 0, memory)
        print("Scores:")
        print(scores)
        if(scores["player1"] > scores["player2"] * config.SCORING_THRESHOLD):
            bestPlayerVersion += 1
            bestNN.model.set_weights(currentNN.model.get_weights())
            bestNN.write(f'../models/version{bestPlayerVersion}.h5')
Ejemplo n.º 7
0
def do_train(iteration, current_player, best_player, best_player_version, current_NN, best_NN, memory):
    while 1:

        iteration += 1
        reload(lg)
        reload(config)

        print('ITERATION NUMBER ' + str(iteration))

        lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
        print('BEST PLAYER VERSION ' + str(best_player_version))

        ######## SELF PLAY ########
        print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
        _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main,
                                      turns_until_tau0=config.TURNS_UNTIL_TAU0, memory=memory)
        print('\n')

        memory.clear_stmemory()

        if len(memory.ltmemory) >= config.MEMORY_SIZE:

            ######## RETRAINING ########
            print('RETRAINING...')
            current_player.replay(memory.ltmemory)
            print('')

            if iteration % 5 == 0:
                pickle.dump(memory, open(run_folder + "memory/memory" + str(iteration).zfill(4) + ".p", "wb"))

            lg.logger_memory.info('====================')
            lg.logger_memory.info('NEW MEMORIES')
            lg.logger_memory.info('====================')

            memory_samp = random.sample(memory.ltmemory, min(1000, len(memory.ltmemory)))

            for s in memory_samp:
                current_value, current_probs, _ = current_player.get_preds(s['state'])
                best_value, best_probs, _ = best_player.get_preds(s['state'])

                lg.logger_memory.info('MCTS VALUE FOR %s: %f', s['playerTurn'], s['value'])
                lg.logger_memory.info('CUR PRED VALUE FOR %s: %f', s['playerTurn'], current_value)
                lg.logger_memory.info('BES PRED VALUE FOR %s: %f', s['playerTurn'], best_value)
                lg.logger_memory.info('THE MCTS ACTION VALUES: %s', ['%.2f' % elem for elem in s['AV']])
                lg.logger_memory.info('CUR PRED ACTION VALUES: %s', ['%.2f' % elem for elem in current_probs])
                lg.logger_memory.info('BES PRED ACTION VALUES: %s', ['%.2f' % elem for elem in best_probs])
                lg.logger_memory.info('ID: %s', s['state'].id)
                lg.logger_memory.info('INPUT TO MODEL: %s', current_player.model.convertToModelInput(s['state']))

                s['state'].render(lg.logger_memory)

            ######## TOURNAMENT ########
            print('TOURNAMENT...')
            scores, _, points, sp_scores = playMatches(best_player, current_player, config.EVAL_EPISODES,
                                                       lg.logger_tourney, turns_until_tau0=0, memory=None)
            print('\nSCORES')
            print(scores)
            print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
            print(sp_scores)
            # print(points)

            print('\n\n')

            if scores['current_player'] > scores['best_player'] * config.SCORING_THRESHOLD:
                best_player_version = best_player_version + 1
                best_NN.model.set_weights(current_NN.model.get_weights())
                best_NN.write(env.name, best_player_version)

        else:
            print('MEMORY SIZE: ' + str(len(memory.ltmemory)))
Ejemplo n.º 8
0
def self_play_worker(conn):
    import os
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE
    from memory import Memory
    from settings import run_folder, run_archive_folder
    import initialise
    from game import Game, GameState
    from agent import Agent
    from model import Residual_CNN, import_tf
    import_tf(1024 * 3)
    from shutil import copyfile
    from funcs import playMatches
    import loggers as lg
    import logging
    import random

    env = Game()

    ######## LOAD MODEL IF NECESSARY ########
    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1, ) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               int(PLAYER_COUNT / TEAM_SIZE),
                               config.HIDDEN_CNN_LAYERS)
        opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                   shape, int(PLAYER_COUNT / TEAM_SIZE),
                                   config.HIDDEN_CNN_LAYERS)
    else:
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)
        opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                   shape, PLAYER_COUNT,
                                   config.HIDDEN_CNN_LAYERS)

    best_player_version = 0
    best_NN.model.set_weights(opponent_NN.model.get_weights())

    best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN)
    opponent_player = Agent('selected_opponent', config.MCTS_SIMS,
                            config.CPUCT, opponent_NN)

    if initialise.INITIAL_ITERATION != None:
        iteration = initialise.INITIAL_ITERATION
    else:
        iteration = 0

    memories = Memory(150 * config.EPISODES)
    while 1:
        iteration += 1

        # request best_NN weights
        conn.send(best_player_version)
        # wait indefinitely for best_NN weights
        conn.poll(None)
        data = conn.recv()
        #print('recieved: {}'.format(data))

        # if weights different set weights
        if data:
            best_NN.model.set_weights(data[1])
            best_player_version = data[0]

        if len(memories.ltmemory) != 0:  # send new memories (skip first loop)
            conn.send(memories.ltmemory)

        memories = Memory(150 * config.EPISODES)
        ######## CREATE LIST OF PLAYERS #######
        # for training it is just 2 copies of the best_player vs. 2 copies of another randomly selected model
        filenames = os.listdir('run/models/')
        filenames = [name for name in filenames if '.h5' == name[-3:]]

        if filenames:
            opponent = random.choice(filenames)
            m_tmp = opponent_NN.read_specific('run/models/' + opponent)
            opponent_NN.model.set_weights(m_tmp.get_weights())

            self_play_players = []
            for i in range(PLAYER_COUNT):
                if i % 2 == 0:
                    self_play_players.append(best_player)
                else:
                    self_play_players.append(opponent_player)
        else:
            self_play_players = []
            for i in range(PLAYER_COUNT):
                self_play_players.append(best_player)

        #print("Version {} randomly selected to play against version {}".format(int(opponent[-7:-3]), best_player_version))

        ######## SELF PLAY ########
        #epsilon = init_epsilon - iteration * (init_epsilon / 50.0)
        epsilon = 0

        #print('Current epsilon: {}'.format(epsilon))
        print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
        _, memories = playMatches(self_play_players,
                                  config.EPISODES,
                                  lg.logger_main,
                                  epsilon,
                                  memory=memories)
Ejemplo n.º 9
0
    iteration += 1
    # reload(lg)
    # reload(config)

    print('ITERATION NUMBER ' + str(iteration))

    # lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## SELF PLAY ########
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player,
                                  best_player,
                                  config.EPISODES,
                                  None,
                                  turns_until_tau0=config.TURNS_UNTIL_TAU0,
                                  memory=memory,
                                  board_size=config.BOARD_SIZE)
    print('\n')

    memory.clear_stmemory()

    if len(memory.ltmemory) >= config.MEMORY_SIZE:

        ######## RETRAINING ########
        print('RETRAINING...')
        current_player.replay(memory.ltmemory)
        print('')

        if iteration % 5 == 0:
Ejemplo n.º 10
0
import random
from importlib import reload
from game import Game, GameState
from agent import Agent
from memory import Memory
from funcs import playMatches
import loggers as lg
from settings import run_folder, run_archive_folder
import initialise
import pickle
import config

# If loading an existing neural network, copy the config file to root
if initialise.INITIAL_RUN_NUMBER != None:
    copyfile(
        run_archive_folder + env.name + '/run' +
        str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py',
        './config.py')

print('\n')

######## CREATE THE PLAYERS ########

iteration = 0
while 1:
    iteration += 1
    print('ITERATION NUMBER ' + str(iteration))
    _, _, _ = playMatches(config.EPISODES,
                          lg.logger_main,
                          turns_until_tau0=config.TURNS_UNTIL_TAU0)
    print('\n')
Ejemplo n.º 11
0
# -*- coding: utf-8 -*-
# %matplotlib inline

import numpy as np
np.set_printoptions(suppress=True)
from shutil import copyfile
import random
from importlib import reload
from game import Game, GameState
from agent import Agent
from funcs import playMatches
from settings import run_folder, run_archive_folder
import pickle
import config



print('\n')

######## CREATE THE PLAYERS ########


iteration = 0
while 1:
    iteration += 1
    print('ITERATION NUMBER ' + str(iteration))
    _,  _, _ = playMatches(config.EPISODES)
    print('\n')
Ejemplo n.º 12
0
iteration = 0

while 1:

    iteration += 1
    reload(lg)
    reload(config)
    
    print('ITERATION NUMBER ' + str(iteration))
    
    lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## SELF PLAY ########
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)
    print('\n')
    
    memory.clear_stmemory()
    
    if len(memory.ltmemory) >= config.MEMORY_SIZE:

        ######## RETRAINING ########
        print('RETRAINING...')
        current_player.replay(memory.ltmemory)
        print('')

        if iteration % 5 == 0:
            pickle.dump( memory, open( run_folder + "memory/memory" + iteration + ".p", "wb" ) )

        lg.logger_memory.info('====================')
Ejemplo n.º 13
0
run_version = 1
player1version = 10
player2version = 50
EPISODES = 7
logger = loggers.logger_tourney
turns_until_tau0 = 0

env = Game()
network = ResCNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,
                 env.action_size, config.HIDDEN_CNN_LAYERS)

network.load(env.name, run_version, player1version)
player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS,
                config.CPUCT, network)

network.load(env.name, run_version, player2version)
player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS,
                config.CPUCT, network)

print('Players are ready, Tourney begins!')

goes_first = 0
scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES,
                                                logger, turns_until_tau0, None,
                                                goes_first)

print(scores)
print(points)
print(sp_scores)