Beispiel #1
0
    def handle_get_action(self, state):
        """
        Here you will use the neural net that you trained using MCTS to select a move for your actor on the current board.
        Remember to use the correct player_number for YOUR actor! The default action is to select a random empty cell
        on the board. This should be modified.
        :param state: The current board in the form (1 or 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), where
        1 or 2 indicates the number of the current player.  If you are player 2 in the current series, for example,
        then you will see a 2 here throughout the entire series, whereas player 1 will see a 1.
        :return: Your actor's selected action as a tuple (row, column)
        """

        # This is an example player who picks random moves. REMOVE THIS WHEN YOU ADD YOUR OWN CODE !!
        # next_move = tuple(self.pick_random_free_cell(state, size=int(math.sqrt(len(state) - 1))))
        player = list(state).pop(0)
        board_state = list(state)[1:]
        size = int(math.sqrt(len(board_state)))

        # [1, 2, 3, 4] --> [[1, 2], [3, 4]]
        board = np.reshape(board_state, (size, size))

        # [[1, 2], [3, 4]] --> [[[p1, p2, free], [p1, p2, free]], [[p1, p2, free], [p1, p2, free]]]
        _board = MCTS.get_feature(board, player == 2)

        next_move = ANET.predict(_board, self.model)
        legal_moves = []
        for i in range(len(board)):
            for j in range(len(board)):
                if (board[i][j] == 0):
                    legal_moves.append((i, j))
        next_move = (next_move[1], next_move[0]) if player == 2 else next_move

        if next_move not in legal_moves:
            next_move = choice(legal_moves)

        return next_move
 def fetch_game_models(self, dir_path="anet"):
     for filename in os.listdir(dir_path):
         if filename.endswith(".h5"):
             model = ANET.new_from_file(dir_path + "/" + filename)
             key = self.filename_to_step(filename)
             self.models[key] = model
         else:
             continue
    def run_tournament(self, path="topp"):
        self.fetch_game_models(path)
        games = self.get_games()
        wrong_moves = 0
        total_moves = 0
        for p1, p2 in games:
            for i in range(self.num_games):
                game = Hex(5, 1)
                while len(game.get_moves()) > 0:
                    if self.random and game.player == Hex.PLAYER_LEFT:
                        next_move = choice(game.get_moves())
                    else:
                        model = self.models[p1 if game.player ==
                                            Hex.PLAYER_TOP else p2]
                        next_move = ANET.predict(get_feature(
                            game.get_state(), game.player == Hex.PLAYER_LEFT),
                                                 model=model)
                        next_move = (
                            next_move[1], next_move[0]
                        ) if game.player == Hex.PLAYER_LEFT else next_move
                        if next_move not in game.get_moves():
                            wrong_moves += 1
                            next_move = choice(game.get_moves())
                    total_moves += 1
                    game.do_move(next_move)
                res_p1 = game.get_result(Hex.PLAYER_TOP)
                res_p2 = game.get_result(Hex.PLAYER_LEFT)
                if res_p1 == 0 and res_p2 == 0:
                    raise Exception
                if res_p1 == 1 and res_p2 == 1:
                    raise Exception

                winning_model = p1 if res_p1 == 1 else p2
                loosing_model = p2 if res_p2 == 0 else p1
                if self.results.get(
                    (winning_model, loosing_model)) is not None:
                    self.results[(winning_model, loosing_model)] += 1
                else:
                    self.results[(winning_model, loosing_model)] = 1
        pp = pprint.PrettyPrinter(indent=4)
        pp.pprint(self.results)
        if total_moves > 0:
            pp.pprint("Wrong moves (%): " + str(wrong_moves / total_moves))
Beispiel #4
0
# from topp import TOPP
# topp = TOPP(verbose=True)
# topp.start()

from anet import ANET
from utils import Timer
import numpy as np

timer = Timer()
nn = ANET(5)
rbuf = np.load('/home/kim/skule/aiprog/project3/rbufs/size_5-ep_180.h5.npy',
               allow_pickle=True)
test_ex = rbuf[0][0]
to_predict = np.array([list(test_ex)])

timer.start("prediction")
prediction = nn.model.predict(to_predict)
timer.stop()

# import tensorflow as tf
# from tensorflow import keras
# import numpy as np
# import random
# import os

# def softmax(x):
#     """Compute softmax values for each sets of scores in x."""
#     e_x = np.exp(x - np.max(x))
#     return e_x / e_x.sum()

# # model = keras.models.load_model("./models/size_5-ep_180.h5")
Beispiel #5
0
 def __init__(self, IP_address=None, verbose=True):
     self.series_id = -1
     BasicClientActorAbs.__init__(self, IP_address, verbose=verbose)
     self.model = ANET.new_from_file("../short_topp/model_step_10.h5")
Beispiel #6
0
from state_manager import StateManager
from mcts import MCTS
from tree import Tree
import numpy as np
import collections


def train_anet(anet, RBUF):
    # Creates a minibatch of the RBUF and trains the anet on the minibatch
    batch_size = min(len(RBUF), 32)
    minibatch = random.sample(RBUF, batch_size)
    anet.train(minibatch)


""" Initializations """
anet = ANET(size=board_size)
agent = MCTS(exploration_rate=1, anet=anet)
sm = StateManager()
game = sm.create_game()
tree = Tree(game)
win_stats = []
RBUF = collections.deque(maxlen=rbuf_max_size)

for i in range(offset, num_of_games + 1):
    print("Episode: {}/{}".format(i, num_of_games))
    state = tree.root

    while (not sm.is_game_over()):
        best_child, training_case = agent.uct_search(tree, state,
                                                     num_search_games)
        RBUF.append(training_case)
def main():
    # n, num_games, verbose, starting_player, max_rollouts = setup_game()
    n, num_games, verbose, starting_player, max_rollouts = 5, 200, False, 1, 0.5
    results = []
    game_num = 1
    viewer = None

    run_tournament = True
    with_training = True
    num_games_tournament = 25
    if run_tournament:
        save_path = "short_topp"
    else:
        save_path = "long_topp"

    ##### CONFIG #####

    buffer_size = 40
    train_interval = 40
    saving_interval = 10
    moves_done = 0
    epochs = 300

    ##################

    buffer = ReplayBuffer(vfrac=0.1, tfrac=0.1, size=buffer_size)
    anet = init_anet(n, buffer)

    if with_training:
        anet.save_to_file(save_path + "/model_step_{0}.h5".format(0))
    game = Hex(n, starting_player)
    ROOT_NODE = Node(game=game)
    while with_training and num_games >= game_num:
        game = Hex(n, starting_player)
        next_root = ROOT_NODE
        # viewer = Board(game)
        print("Game number {}".format(game_num))
        while game.get_moves():
            mc = MonteCarlo(game, max_rollouts, next_root)
            mc.run(lambda _input: ANET.predict(_input, model=anet.model))
            case = mc.get_training_case()
            buffer.push(case)
            next_root = mc.get_best_move()
            game.do_move(next_root.move)
            moves_done += 1

            if viewer:
                viewer.do_move(next_root.move, game.player)
            if moves_done % train_interval == 0:
                buffer.update()
                anet.train_model(epochs)
                anet.run_against_random(num_games=50, game_num=game_num)
        if saving_interval > 0 and game_num % saving_interval == 0:
            anet.save_to_file(save_path +
                              "/model_step_{0}.h5".format(game_num))
            buffer.size += 20
            # train_interval += 5
            # anet.optimizer.lr /= 2
        if game.get_result(game.player) == 1:
            results.append(game.player)
        game_num += 1

    if viewer:
        viewer.persist()

    if run_tournament:
        tournament = Tournament(num_games_tournament)
        tournament.run_tournament(save_path)

    else:
        anet.save_to_file("best_topp/model_2.h5")
def init_anet(size, buffer):
    return ANET(size=size,
                buffer=buffer,
                batch_size=20,
                optimizer=optimizers.Adagrad(0.005))
Beispiel #9
0
    minibatch = random.sample(RBUF, k=batch_size)
    x_train, y_train = zip(*minibatch)
    anet.train(np.asarray(x_train), np.asarray(y_train))


def get_distribution(node):
    distribution = np.zeros(node.game.boardsize**2)
    for child in node.children:
        distribution[child.move] = child.visits
    print(distribution)
    D = normalize(distribution)
    return D


""" Initializations """
anet = ANET(boardsize)
agent = MCTS(exploration_rate=1, anet=anet)
sm = StateManager(boardsize)
game = sm.create_game()
tree = Tree(game, 1.0)
win_stats = []

# TODO: Save interval for ANET parameters
RBUF = collections.deque(maxlen=500)

for i in range(1, num_of_games + 1):
    progress_bar(i + 1)
    state = tree.root

    while (not sm.is_finished()):
        player = sm.game.get_current_player()