Ejemplo n.º 1
0
    def choose_action(self, state, model):

        rbs = rbg_game.resettable_bitarray_stack()
        moves = state.get_all_moves(rbs)
        boards = []

        for mv in moves:
            boardcp = state.copy()
            boardcp.apply_with_keeper(mv, rbs)
            boards.append(model_utilis.parse_game_state(boardcp))

        values = model(torch.cat(boards, 0).to(self.device))

        max_id = torch.argmax(values)

        return moves[max_id], values[max_id]
Ejemplo n.º 2
0
def test(test_config):

    rbs = rbg_game.resettable_bitarray_stack()

    model_1 = test_config['model_1']
    player_1 = test_config['player_1']

    model_2 = test_config['model_2']
    player_2 = test_config['player_2']

    num_games = test_config['num_games']

    player_1_number = 1
    player_2_number = 2

    wins = 0
    draws = 0

    for game_number in tqdm(range(num_games)):

        game_state = rbg_game.new_game_state()

        while True:

            if game_state.get_current_player() == 0:
                break

            if game_state.get_current_player() == player_1_number:
                move_rbs, _ = player_1.choose_action(game_state, model_1)
                make_move(game_state, move_rbs, rbs)
            else:
                move_rbs, _ = player_2.choose_action(game_state, model_2)
                make_move(game_state, move_rbs, rbs)

        if game_state.get_player_score(player_1_number) == 100:
            wins += 1
        if game_state.get_player_score(player_1_number) == 50:
            draws += 1

        player_2_number, player_1_number = player_1_number, player_2_number

    print('Wins=', wins)
    print('Draws=', draws)
    print('Wins+Draws=', wins + draws)
    print('Win+Draw rate=', (wins + draws) / num_games)
Ejemplo n.º 3
0
Archivo: train.py Proyecto: marekesz/NN
def train(train_config):

    rbs = rbg_game.resettable_bitarray_stack()

    model_1 = train_config['model_1']
    trainer_1 = train_config['trainer_1']
    player_1 = train_config['player_1']

    model_2 = train_config['model_2']
    trainer_2 = train_config['trainer_2']
    player_2 = train_config['player_2']

    model_name = train_config['model_name']
    num_games = train_config['num_games']
    save_model_every_n_iterations = train_config[
        'save_model_every_n_iterations']
    save_path = train_config['save_path']

    player_1_number = 1
    player_2_number = 2

    for game_number in tqdm(range(num_games)):

        game_state = rbg_game.new_game_state()

        while True:

            if game_state.get_current_player() == 0:
                break

            old_state = game_state.copy()

            if game_state.get_current_player() == player_1_number:
                move_rbs, move_value = player_1.choose_action(
                    game_state, model_1)
                make_move(game_state, move_rbs, rbs)

                if game_state.get_current_player() == 0:
                    trainer_1.add_to_history(
                        replay.MovePerformed(
                            old_state, move_rbs, game_state.copy(), move_value,
                            game_state.get_player_score(player_1_number) /
                            100))
                else:
                    trainer_1.add_to_history(
                        replay.MovePerformed(old_state, move_rbs,
                                             game_state.copy(), move_value))
            else:
                move_rbs, move_value = player_2.choose_action(
                    game_state, model_2)
                make_move(game_state, move_rbs, rbs)

                if game_state.get_current_player() == 0:
                    trainer_2.add_to_history(
                        replay.MovePerformed(
                            old_state, move_rbs, game_state.copy(), move_value,
                            game_state.get_player_score(player_2_number) /
                            100))
                else:
                    trainer_2.add_to_history(
                        replay.MovePerformed(old_state, move_rbs,
                                             game_state.copy(), move_value))

        trainer_1.learn(game_state.get_player_score(player_1_number) / 100)
        trainer_2.learn(game_state.get_player_score(player_2_number) / 100)

        player_2_number, player_1_number = player_1_number, player_2_number

        if game_number % save_model_every_n_iterations == 0:
            save_model(save_path, model_name + '_model_1', model_1)
            save_model(save_path, model_name + '_model_2', model_2)
Ejemplo n.º 4
0
import rbg_game
import random
import time

bitarraystack = rbg_game.resettable_bitarray_stack()


# Visits all the game tree of a given game to the given depth
# Returns a pair of numbers of leaves visited and number of nodes visited
def perft(state, depth):
    if not state:
        return [0, 0]
    if depth == 0:
        return [1, 1]
    moves = state.get_all_moves(bitarraystack)
    result = [0, 0]
    result[1] += 1
    for move in moves:
        next_state = state.copy()
        next_state.apply_with_keeper(move, bitarraystack)
        rec_result = perft(next_state, depth - 1)
        result[0] += rec_result[0]
        result[1] += rec_result[1]
    return result


# Play the game till the end from the current state.
# This modifies the given state.
def playout(state):
    nodes = 0
    while True:
Ejemplo n.º 5
0
    def choose_action(self, game_state):

        rbs = rbg_game.resettable_bitarray_stack()
        move = random.choice(game_state.get_all_moves(rbs))

        return (move, None)