Ejemplos de playMatches en Python, ejemplos de funcs.playMatches en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: selfplay.py Proyecto: franktore/DeepReinforcementLearning

def _selfplay(n):
    chessenv = Game()
    memory = Memory(config.MEMORY_SIZE)
    current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                              (119, ) + chessenv.grid_shape,
                              chessenv.action_size, config.HIDDEN_CNN_LAYERS)
    best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                           (119, ) + chessenv.grid_shape, chessenv.action_size,
                           config.HIDDEN_CNN_LAYERS)
    best_NN.model.set_weights(current_NN.model.get_weights())
    current_player = Agent('current_player', chessenv.state_size,
                           chessenv.action_size, config.MCTS_SIMS,
                           config.CPUCT, current_NN)
    best_player = Agent('best_player', chessenv.state_size,
                        chessenv.action_size, config.MCTS_SIMS, config.CPUCT,
                        best_NN)

    t0 = time.perf_counter()
    print('Proc {0} start'.format(n))
    _, memory, _, _ = playMatches(best_player,
                                  best_player,
                                  config.EPISODES,
                                  lg.logger_main,
                                  turns_until_tau0=config.TURNS_UNTIL_TAU0,
                                  memory=memory)
    t1 = time.perf_counter() - t0
    print('Proc {0} done in {1} seconds'.format(n, t1))
    return memory

Ejemplo n.º 2

0

Mostrar archivo

def play_one_round():
    global firstRun
    global player1
    global player2
    go_first = input('Do you want to go first (y/n)?')
    isFirst = 0
    if (go_first.lower() == "y"):
        isFirst = 1
    else:
        isFirst = -1
    player1_version = -1
    player2_version = ai_player_version

    if (firstRun):
        firstRun = False
        _, _, _, _, player1, player2 = playMatchesBetweenVersions(
            env, run_version, player1_version, player2_version, episodes,
            lg.logger_play_game, 0, isFirst)
    else:
        playMatches(player1, player2, episodes, lg.logger_play_game, 0, None,
                    isFirst)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: main.py Proyecto: AlexandreAdam/DeepReinforcementLearning

while 1:

    iteration += 1
    reload(lg)
    reload(config)

    print('ITERATION NUMBER ' + str(iteration))

    lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    # SELF PLAY
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player,
                                  best_player,
                                  config.EPISODES,
                                  lg.logger_main,
                                  turns_until_tau0=config.TURNS_UNTIL_TAU0,
                                  memory=memory)
    print('\n')

    memory.clear_stmemory()

    if len(memory.ltmemory) >= config.MEMORY_SIZE:

        # RETRAINING
        print('RETRAINING...')
        current_player.replay(memory.ltmemory)
        print('')

        if iteration % 5 == 0:
            pickle.dump(

Ejemplo n.º 4

0

Mostrar archivo

        else:
            m_tmp = player1_NN.read(initialise.INITIAL_RUN_NUMBER,
                                    player_idx + 1)
            player1_NN.model.set_weights(m_tmp.get_weights())
            player1 = Agent('player1', env.state_size, env.action_size,
                            config.MCTS_SIMS, config.CPUCT, player1_NN)
            m_tmp = player2_NN.read(initialise.INITIAL_RUN_NUMBER,
                                    opponent_idx + 1)
            player2_NN.model.set_weights(m_tmp.get_weights())
            player2 = Agent('player2', env.state_size, env.action_size,
                            config.MCTS_SIMS, config.CPUCT, player2_NN)

            scores, memory, points, sp_scores = funcs.playMatches(
                player1,
                player2,
                EPISODES,
                lg.logger_main,
                turns_until_tau0=0,
                goes_first=1)
            print('\n')
            print('-------')
            print('player1: version {}'.format(player_idx))
            print('player2: version {}'.format(opponent_idx))
            print('\nSCORES')
            print(scores)
            print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
            print(sp_scores)
            print(points)
            points_dict[player_idx] += sum(points[player1.name])
            points_dict[opponent_idx] += sum(points[player2.name])
            plt.figure()

Ejemplo n.º 5

0

Mostrar archivo

Archivo: evaluation_worker.py Proyecto: PhillipMerritt/Alpha_Domino

def evaluation_worker(conn):
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE
    import initialise
    from model import Residual_CNN, import_tf
    import_tf(1024 * 3)
    from game import Game
    from agent import Agent
    from memory import Memory
    from funcs import playMatches
    import loggers as lg
    import logging
    import time

    # initialise new test memory
    test_memories = Memory(int(MEMORY_SIZE / 10))

    env = Game()

    # initialise new models
    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1, ) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  shape, int(PLAYER_COUNT / TEAM_SIZE),
                                  config.HIDDEN_CNN_LAYERS)
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               int(PLAYER_COUNT / TEAM_SIZE),
                               config.HIDDEN_CNN_LAYERS)
    else:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  shape, PLAYER_COUNT,
                                  config.HIDDEN_CNN_LAYERS)
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)

    current_player_version = 0
    best_player_version = 0
    # If loading an existing neural netwrok, set the weights from that model
    if initialise.INITIAL_MODEL_VERSION != None:
        best_player_version = initialise.INITIAL_MODEL_VERSION
        #print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
        m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER,
                             initialise.INITIAL_MODEL_VERSION)
        current_NN.model.set_weights(m_tmp.get_weights())
        best_NN.model.set_weights(m_tmp.get_weights())
    # otherwise just ensure the weights on the two players are the same
    else:
        best_NN.model.set_weights(current_NN.model.get_weights())

    current_player = Agent('current_player', config.MCTS_SIMS, config.CPUCT,
                           current_NN)
    best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN)

    time.sleep(20)

    while 1:
        # request current_NN weights
        conn.send(current_player_version)
        # wait indefinitely for current_NN weights
        conn.poll(None)
        data = conn.recv()

        if data:

            # set current_NN weights
            current_NN.model.set_weights(data)
            current_player_version += 1

            # play tournament games
            tourney_players = []
            if TEAM_SIZE > 1:
                for i in range(int(PLAYER_COUNT / TEAM_SIZE)):  # for each team
                    for k in range(
                            TEAM_SIZE
                    ):  # alternate adding best_players and current_players up to the TEAM_SIZE
                        if k % 2 == 0:
                            tourney_players.append(best_player)
                        else:
                            tourney_players.append(current_player)
            else:
                for i in range(PLAYER_COUNT):
                    if i % 2 == 0:
                        tourney_players.append(best_player)
                    else:
                        tourney_players.append(current_player)

            scores, test_memories = playMatches(tourney_players,
                                                config.EVAL_EPISODES,
                                                lg.logger_tourney,
                                                0.0,
                                                test_memories,
                                                evaluation=True)
            test_memories.clear_stmemory()

            # if the current player is significantly better than the best_player replace the best player
            # the replacement is made by just copying the weights of current_player's nn to best_player's nn
            if scores['current_player'] > scores[
                    'best_player'] * config.SCORING_THRESHOLD:
                # if current_NN won send message
                conn.send(((current_player_version, best_player_version),
                           str(scores)))

                best_player_version = best_player_version + 1
                best_NN.model.set_weights(current_NN.model.get_weights())
                best_NN.write(env.name, best_player_version)

            if len(
                    test_memories.ltmemory
            ) == test_memories.MEMORY_SIZE and current_player_version % 5 == 0:
                pickle.dump(
                    memories,
                    open(
                        run_folder + "memory/test_memory" +
                        str(current_player_version).zfill(4) + ".p", "wb"))

                #print("Evaluating performance of current_NN")
                #current_player.evaluate_accuracy(test_memories.ltmemory)
                #print('\n')
        else:
            time.sleep(10)

Ejemplo n.º 6

0

Mostrar archivo

bestPlayer = Agent(bestNN)

os.makedirs(os.path.dirname(f'../memory/version0.p'), exist_ok=True)
if initialMemory == None:
    memory = Memory()
else:
    print(f'Loading memory version {initialMemory}...')
    memory = pickle.load(open(f'../memory/version{initialMemory}.p', "rb"))

iteration = 0

while True:
    iteration += 1
    print(f'Iteration {iteration}:')
    print("Playing matches...")
    playMatches(bestPlayer, bestPlayer, config.EPISODES, config.TURNS_UNTIL_TAU0, memory)

    pickle.dump(memory, open(f'../memory/version{iteration}.p', "wb"))

    if(len(memory.longTerm) >= config.MEMORY_SIZE):
        print("Retraining...")
        currentPlayer.replay(memory.longTerm)
        
        print("Tournament...")
        scores = playMatches(currentPlayer, bestPlayer, config.EVAL_EPISODES, 0, memory)
        print("Scores:")
        print(scores)
        if(scores["player1"] > scores["player2"] * config.SCORING_THRESHOLD):
            bestPlayerVersion += 1
            bestNN.model.set_weights(currentNN.model.get_weights())
            bestNN.write(f'../models/version{bestPlayerVersion}.h5')

Ejemplo n.º 7

0

Mostrar archivo

Archivo: main.py Proyecto: Jalapinho1/DeepReinforcementLearning

def do_train(iteration, current_player, best_player, best_player_version, current_NN, best_NN, memory):
    while 1:

        iteration += 1
        reload(lg)
        reload(config)

        print('ITERATION NUMBER ' + str(iteration))

        lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
        print('BEST PLAYER VERSION ' + str(best_player_version))

        ######## SELF PLAY ########
        print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
        _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main,
                                      turns_until_tau0=config.TURNS_UNTIL_TAU0, memory=memory)
        print('\n')

        memory.clear_stmemory()

        if len(memory.ltmemory) >= config.MEMORY_SIZE:

            ######## RETRAINING ########
            print('RETRAINING...')
            current_player.replay(memory.ltmemory)
            print('')

            if iteration % 5 == 0:
                pickle.dump(memory, open(run_folder + "memory/memory" + str(iteration).zfill(4) + ".p", "wb"))

            lg.logger_memory.info('====================')
            lg.logger_memory.info('NEW MEMORIES')
            lg.logger_memory.info('====================')

            memory_samp = random.sample(memory.ltmemory, min(1000, len(memory.ltmemory)))

            for s in memory_samp:
                current_value, current_probs, _ = current_player.get_preds(s['state'])
                best_value, best_probs, _ = best_player.get_preds(s['state'])

                lg.logger_memory.info('MCTS VALUE FOR %s: %f', s['playerTurn'], s['value'])
                lg.logger_memory.info('CUR PRED VALUE FOR %s: %f', s['playerTurn'], current_value)
                lg.logger_memory.info('BES PRED VALUE FOR %s: %f', s['playerTurn'], best_value)
                lg.logger_memory.info('THE MCTS ACTION VALUES: %s', ['%.2f' % elem for elem in s['AV']])
                lg.logger_memory.info('CUR PRED ACTION VALUES: %s', ['%.2f' % elem for elem in current_probs])
                lg.logger_memory.info('BES PRED ACTION VALUES: %s', ['%.2f' % elem for elem in best_probs])
                lg.logger_memory.info('ID: %s', s['state'].id)
                lg.logger_memory.info('INPUT TO MODEL: %s', current_player.model.convertToModelInput(s['state']))

                s['state'].render(lg.logger_memory)

            ######## TOURNAMENT ########
            print('TOURNAMENT...')
            scores, _, points, sp_scores = playMatches(best_player, current_player, config.EVAL_EPISODES,
                                                       lg.logger_tourney, turns_until_tau0=0, memory=None)
            print('\nSCORES')
            print(scores)
            print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
            print(sp_scores)
            # print(points)

            print('\n\n')

            if scores['current_player'] > scores['best_player'] * config.SCORING_THRESHOLD:
                best_player_version = best_player_version + 1
                best_NN.model.set_weights(current_NN.model.get_weights())
                best_NN.write(env.name, best_player_version)

        else:
            print('MEMORY SIZE: ' + str(len(memory.ltmemory)))

Ejemplo n.º 8

0

Mostrar archivo

def self_play_worker(conn):
    import os
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE
    from memory import Memory
    from settings import run_folder, run_archive_folder
    import initialise
    from game import Game, GameState
    from agent import Agent
    from model import Residual_CNN, import_tf
    import_tf(1024 * 3)
    from shutil import copyfile
    from funcs import playMatches
    import loggers as lg
    import logging
    import random

    env = Game()

    ######## LOAD MODEL IF NECESSARY ########
    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1, ) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               int(PLAYER_COUNT / TEAM_SIZE),
                               config.HIDDEN_CNN_LAYERS)
        opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                   shape, int(PLAYER_COUNT / TEAM_SIZE),
                                   config.HIDDEN_CNN_LAYERS)
    else:
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)
        opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                   shape, PLAYER_COUNT,
                                   config.HIDDEN_CNN_LAYERS)

    best_player_version = 0
    best_NN.model.set_weights(opponent_NN.model.get_weights())

    best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN)
    opponent_player = Agent('selected_opponent', config.MCTS_SIMS,
                            config.CPUCT, opponent_NN)

    if initialise.INITIAL_ITERATION != None:
        iteration = initialise.INITIAL_ITERATION
    else:
        iteration = 0

    memories = Memory(150 * config.EPISODES)
    while 1:
        iteration += 1

        # request best_NN weights
        conn.send(best_player_version)
        # wait indefinitely for best_NN weights
        conn.poll(None)
        data = conn.recv()
        #print('recieved: {}'.format(data))

        # if weights different set weights
        if data:
            best_NN.model.set_weights(data[1])
            best_player_version = data[0]

        if len(memories.ltmemory) != 0:  # send new memories (skip first loop)
            conn.send(memories.ltmemory)

        memories = Memory(150 * config.EPISODES)
        ######## CREATE LIST OF PLAYERS #######
        # for training it is just 2 copies of the best_player vs. 2 copies of another randomly selected model
        filenames = os.listdir('run/models/')
        filenames = [name for name in filenames if '.h5' == name[-3:]]

        if filenames:
            opponent = random.choice(filenames)
            m_tmp = opponent_NN.read_specific('run/models/' + opponent)
            opponent_NN.model.set_weights(m_tmp.get_weights())

            self_play_players = []
            for i in range(PLAYER_COUNT):
                if i % 2 == 0:
                    self_play_players.append(best_player)
                else:
                    self_play_players.append(opponent_player)
        else:
            self_play_players = []
            for i in range(PLAYER_COUNT):
                self_play_players.append(best_player)

        #print("Version {} randomly selected to play against version {}".format(int(opponent[-7:-3]), best_player_version))

        ######## SELF PLAY ########
        #epsilon = init_epsilon - iteration * (init_epsilon / 50.0)
        epsilon = 0

        #print('Current epsilon: {}'.format(epsilon))
        print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
        _, memories = playMatches(self_play_players,
                                  config.EPISODES,
                                  lg.logger_main,
                                  epsilon,
                                  memory=memories)

Ejemplo n.º 9

0

Mostrar archivo

    iteration += 1
    # reload(lg)
    # reload(config)

    print('ITERATION NUMBER ' + str(iteration))

    # lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## SELF PLAY ########
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player,
                                  best_player,
                                  config.EPISODES,
                                  None,
                                  turns_until_tau0=config.TURNS_UNTIL_TAU0,
                                  memory=memory,
                                  board_size=config.BOARD_SIZE)
    print('\n')

    memory.clear_stmemory()

    if len(memory.ltmemory) >= config.MEMORY_SIZE:

        ######## RETRAINING ########
        print('RETRAINING...')
        current_player.replay(memory.ltmemory)
        print('')

        if iteration % 5 == 0:

Ejemplo n.º 10

0

Mostrar archivo

Archivo: main.py Proyecto: Ranyo13/Implementation

import random
from importlib import reload
from game import Game, GameState
from agent import Agent
from memory import Memory
from funcs import playMatches
import loggers as lg
from settings import run_folder, run_archive_folder
import initialise
import pickle
import config

# If loading an existing neural network, copy the config file to root
if initialise.INITIAL_RUN_NUMBER != None:
    copyfile(
        run_archive_folder + env.name + '/run' +
        str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py',
        './config.py')

print('\n')

######## CREATE THE PLAYERS ########

iteration = 0
while 1:
    iteration += 1
    print('ITERATION NUMBER ' + str(iteration))
    _, _, _ = playMatches(config.EPISODES,
                          lg.logger_main,
                          turns_until_tau0=config.TURNS_UNTIL_TAU0)
    print('\n')

Ejemplo n.º 11

0

Mostrar archivo

Archivo: main.py Proyecto: ahmedosama9777/Implementation

# -*- coding: utf-8 -*-
# %matplotlib inline

import numpy as np
np.set_printoptions(suppress=True)
from shutil import copyfile
import random
from importlib import reload
from game import Game, GameState
from agent import Agent
from funcs import playMatches
from settings import run_folder, run_archive_folder
import pickle
import config



print('\n')

######## CREATE THE PLAYERS ########


iteration = 0
while 1:
    iteration += 1
    print('ITERATION NUMBER ' + str(iteration))
    _,  _, _ = playMatches(config.EPISODES)
    print('\n')

Ejemplo n.º 12

0

Mostrar archivo

Archivo: main.py Proyecto: Hidysabc/DeepReinforcementLearning

iteration = 0

while 1:

    iteration += 1
    reload(lg)
    reload(config)
    
    print('ITERATION NUMBER ' + str(iteration))
    
    lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## SELF PLAY ########
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)
    print('\n')
    
    memory.clear_stmemory()
    
    if len(memory.ltmemory) >= config.MEMORY_SIZE:

        ######## RETRAINING ########
        print('RETRAINING...')
        current_player.replay(memory.ltmemory)
        print('')

        if iteration % 5 == 0:
            pickle.dump( memory, open( run_folder + "memory/memory" + iteration + ".p", "wb" ) )

        lg.logger_memory.info('====================')

Ejemplo n.º 13

0

Mostrar archivo

run_version = 1
player1version = 10
player2version = 50
EPISODES = 7
logger = loggers.logger_tourney
turns_until_tau0 = 0

env = Game()
network = ResCNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,
                 env.action_size, config.HIDDEN_CNN_LAYERS)

network.load(env.name, run_version, player1version)
player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS,
                config.CPUCT, network)

network.load(env.name, run_version, player2version)
player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS,
                config.CPUCT, network)

print('Players are ready, Tourney begins!')

goes_first = 0
scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES,
                                                logger, turns_until_tau0, None,
                                                goes_first)

print(scores)
print(points)
print(sp_scores)