Example #1
0
    reload(config)

    print('ITERATION NUMBER ' + str(iteration))

    lg.logger_main.info('BEST PLAYER VERSION: {0}'.format(best_player_version))
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## CREATE LIST OF PLAYERS #######
    # for training it is just 2 copies of the best_player vs. 2 copies of another randomly selected model
    filenames = os.listdir('run/models/')
    filenames = [name for name in filenames if '.h5' == name[-3:]]
    opponent = random.choice(filenames)
    print("Version {} randomly selected to play against version {}".format(
        int(opponent[-7:-3]), best_player_version))

    m_tmp = opponent_NN.read_specific('run/models/' + opponent)
    opponent_NN.model.set_weights(m_tmp.get_weights())

    self_play_players = []
    for i in range(PLAYER_COUNT):
        if i % 2 == 0:
            self_play_players.append(best_player)
        else:
            self_play_players.append(opponent_player)

    ######## SELF PLAY ########
    #epsilon = init_epsilon - iteration * (init_epsilon / 50.0)
    epsilon = 0

    print('Current epsilon: {}'.format(epsilon))
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
Example #2
0
def self_play_worker(conn):
    import os
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE
    from memory import Memory
    from settings import run_folder, run_archive_folder
    import initialise
    from game import Game, GameState
    from agent import Agent
    from model import Residual_CNN, import_tf
    import_tf(1024 * 3)
    from shutil import copyfile
    from funcs import playMatches
    import loggers as lg
    import logging
    import random

    env = Game()

    ######## LOAD MODEL IF NECESSARY ########
    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1, ) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               int(PLAYER_COUNT / TEAM_SIZE),
                               config.HIDDEN_CNN_LAYERS)
        opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                   shape, int(PLAYER_COUNT / TEAM_SIZE),
                                   config.HIDDEN_CNN_LAYERS)
    else:
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)
        opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                   shape, PLAYER_COUNT,
                                   config.HIDDEN_CNN_LAYERS)

    best_player_version = 0
    best_NN.model.set_weights(opponent_NN.model.get_weights())

    best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN)
    opponent_player = Agent('selected_opponent', config.MCTS_SIMS,
                            config.CPUCT, opponent_NN)

    if initialise.INITIAL_ITERATION != None:
        iteration = initialise.INITIAL_ITERATION
    else:
        iteration = 0

    memories = Memory(150 * config.EPISODES)
    while 1:
        iteration += 1

        # request best_NN weights
        conn.send(best_player_version)
        # wait indefinitely for best_NN weights
        conn.poll(None)
        data = conn.recv()
        #print('recieved: {}'.format(data))

        # if weights different set weights
        if data:
            best_NN.model.set_weights(data[1])
            best_player_version = data[0]

        if len(memories.ltmemory) != 0:  # send new memories (skip first loop)
            conn.send(memories.ltmemory)

        memories = Memory(150 * config.EPISODES)
        ######## CREATE LIST OF PLAYERS #######
        # for training it is just 2 copies of the best_player vs. 2 copies of another randomly selected model
        filenames = os.listdir('run/models/')
        filenames = [name for name in filenames if '.h5' == name[-3:]]

        if filenames:
            opponent = random.choice(filenames)
            m_tmp = opponent_NN.read_specific('run/models/' + opponent)
            opponent_NN.model.set_weights(m_tmp.get_weights())

            self_play_players = []
            for i in range(PLAYER_COUNT):
                if i % 2 == 0:
                    self_play_players.append(best_player)
                else:
                    self_play_players.append(opponent_player)
        else:
            self_play_players = []
            for i in range(PLAYER_COUNT):
                self_play_players.append(best_player)

        #print("Version {} randomly selected to play against version {}".format(int(opponent[-7:-3]), best_player_version))

        ######## SELF PLAY ########
        #epsilon = init_epsilon - iteration * (init_epsilon / 50.0)
        epsilon = 0

        #print('Current epsilon: {}'.format(epsilon))
        print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
        _, memories = playMatches(self_play_players,
                                  config.EPISODES,
                                  lg.logger_main,
                                  epsilon,
                                  memory=memories)