Ejemplo n.º 1
0
    def ai_play(self):
        root = self.current_state
        while not self.stateman.is_terminal(self.current_state.grid,
                                            self.current_player * -1):
            test = Mcts(self.stateman, self.current_state, self.simulations)
            self.train_x.append(self.current_state.grid)

            new_state, train_y = test.run()

            self.train_y.append(train_y)

            print(new_state.action)
            #self.n_x_n[new_state.action] = self.current_player
            if self.visualization:
                self.board.auto_place_tile(new_state.action,
                                           self.current_player)
            self.current_player *= -1
            self.current_state = new_state

        for i in range(len(self.train_x)):
            print(str(self.train_x[i]) + " = " + str(self.train_y[i]))

        #model = hex_neural_network(np.array(self.train_x), np.array(self.train_y), len(self.n_x_n))
        #model.train()

        if (self.plotter):
            tree_plot(root)
Ejemplo n.º 2
0
def main():
    config = Config("configs/config.txt")
    state_manager = init_state_manager(config)
    actor = init_actor(config, state_manager.get_vec_to_action_map(),
                       state_manager.get_illegal_action_pruner)
    mc = Mcts(state_manager, actor)

    player_1_wins = 0
    #play game G times
    for game_nr in range(config.G):
        state_manager.current_player = choose_start_player(config.start_player)
        print("--> playig game nr: {} with start_player {}".format(
            game_nr + 1, state_manager.current_player))
        state_manager.print_start_board(config.verbose)
        mc.tree_search(config.m)
        if state_manager.has_won(1):
            player_1_wins += 1
            print("----> player 1 wins")
        else:
            print("----> player 2 wins")

        state_manager.reset_state()

    win_percentage = (player_1_wins / config.G) * 100
    print("player 1 wins: {} of {} games ({}%)".format(player_1_wins, config.G,
                                                       win_percentage))
Ejemplo n.º 3
0
def one_run(env, n_turns, steepness, noise):

    env.max_turns = n_turns
    env.steepness = steepness
    env.noise_factor = noise

    trials = int(20 * 400 / n_turns)

    t = time.time()
    metrics_mcts_v3 = []
    for i in range(trials):
        env.reset()
        m = Metric('step', 'score')
        root = Node(0, 10)
        mcts = Mcts(root)

        done = False
        while not done:
            action = mcts.decide()
            _, r, done, _ = env.step(action)
            mcts.register(r)

        for j, r in enumerate(root.results):
            m.add_record(j, r)

        metrics_mcts_v3.append(m)

    metrics_mcts_v3 = sum(metrics_mcts_v3)
    print('Time for MCTSv3:', time.time() - t)



    t = time.time()
    import random
    metrics_rnd = []
    for i in range(trials):

        env.reset()
        m = Metric('step', 'score')
        rand_results = []
        done = False
        while not done:
            action = random.random() * 10
            _, r, done, _ = env.step(action)
            rand_results.append(r)

        for j, r in enumerate(rand_results):
            m.add_record(j, r)

        metrics_rnd.append(m)

    print('Time for RND:', time.time() - t)

    plot_group({
        'mcts_v3': metrics_mcts_v3,
        'random': sum(metrics_rnd)
    },
        'temp', name=f'{n_turns}_st{steepness}_n{noise}')
Ejemplo n.º 4
0
    def create_dataset(self, number_of_games):
        #state_dict = {-1: State(self.n_x_n, -1, None), 1: State(self.n_x_n, 1, None)}
        #current_state = state_dict[self.current_player]

        current_player = self.current_player
        for i in range(number_of_games):
            if (random() > 0.6):
                current_state, current_player = self.create_rand_state()
            else:
                current_state = State(self.n_x_n, self.current_player, None)
            while not self.stateman.is_terminal(current_state.grid,
                                                self.current_player * -1):
                mcts_runner = Mcts(self.stateman, current_state,
                                   self.simulations)
                if self.current_player == -1:
                    self.train_x.append(self.mirror_board(current_state.grid))
                    #self.train_x[-1].append(1)
                else:
                    self.train_x.append(current_state.grid.copy())
                    #self.train_x[-1].append()

                new_state, train_y = mcts_runner.run()
                if self.current_player == -1:
                    self.train_y.append(self.mirror_board_natural(train_y))
                else:
                    self.train_y.append(train_y)

                self.current_player *= -1
                current_state = new_state

            current_player *= -1
            #current_state = state_dict[current_player]
            self.current_player = current_player
            if (i % 10 == 0):
                print("Number of games finished: " + str(i))

        with open("hex_dataset_x_" + str(self.size) + ".csv", "a",
                  newline="") as file:
            writer = csv.writer(file)
            writer.writerows(self.train_x)

        with open("hex_dataset_y_" + str(self.size) + ".csv", "a",
                  newline="") as file:
            writer = csv.writer(file)
            writer.writerows(self.train_y)

        model = hex_neural_network(self.size)
        model.create_model()
        model.load_dataset_from_csv()
        model.preprocessing()
        model.train()
        model.save_model()
Ejemplo n.º 5
0
def run_trials():
    metrics_mcts = []

    for i in range(trials):
        env.reset()
        m = Metric('step', 'score')
        root = Node(0, 10)
        mcts = Mcts(run_action, root)

        done = False
        while not done:
            done = mcts.step()

        for j, r in enumerate(root.results):
            m.add_record(j, r)

        metrics_mcts.append(m)
        print('Score by MCTS:', sum(root.results))
Ejemplo n.º 6
0
class AIRunner:
    def __init__(self):
        self.model = RenjuModel(modelConfig())
        self.model.load('./currentModel')
        self.config = modelConfig()
        self.restart_game()

    def restart_game(self, otherPlayerIndex=-1):
        self.game = game_state(self.config.common.game_board_size)
        self.mcts = Mcts(modelConfig(), -otherPlayerIndex, self.model)
        self.aiplayer = -otherPlayerIndex

    def get_status(self):
        board = np.array(self.game.board)
        return {
            'board': board.tolist(),
            'next': self.game.playerSide,
            'finished': self.game.finished,
            'winner': self.game.winner,
            'debug_board': self.game.print_beautiful()
        }

    def play(self, x, y):
        if not self.game.playerSide == -self.aiplayer:
            return False
        if not self.game.play(x, y):
            return False
        self.mcts.move_to_child(x, y)
        return True

    def aiplay(self):
        if not self.game.playerSide == self.aiplayer:
            return False, Hand(0, 0)
        move, _ = self.mcts.search_move(autoMoveIntoChild=False)
        if not self.game.play(move.x, move.y):
            return False, Hand(0, 0)
        self.mcts.move_to_child(move.x, move.y)
        return True, move
Ejemplo n.º 7
0
    def __init__(self, client: berserk.Client, game_id, player_id, explore_weight=1.0):
        self.client = client

        self.game_id = game_id
        self.player_id = player_id
        self.explore_weight = explore_weight

        self.stream = client.bots.stream_game_state(game_id)

        game_info = next(self.stream)
        self.initial_fen = game_info["initialFen"]
        if self.initial_fen == "startpos":
            self.initial_fen = STARTING_FEN

        print("Initial FEN: ", self.initial_fen)

        self.tree = Mcts(explore_weight=self.explore_weight)
        self.node = Node(fen=self.initial_fen)
        self.turn_speed_seconds = 10  # TODO

        self.my_turn = game_info["white"]["id"] == self.player_id
        if self.my_turn:
            self.make_move()
Ejemplo n.º 8
0
    def _init(self, unit):

        try:
            min = self.min(unit)
        except TypeError:
            min = self.min

        try:
            max = self.max(unit)
        except TypeError:
            max = self.max


        if self.integer:
            interval = IntNode(min, max)
        else:
            interval = FloatNode(min, max)

        mc = Mcts(interval)
        setattr(unit, self.mcts_storage_name, mc)
        self._decide(unit)
Ejemplo n.º 9
0
    def train_neural_network(self, number_of_games):
        for i in range(number_of_games):
            while not self.stateman.is_terminal(self.current_state.grid,
                                                self.current_player * -1):
                mcts_runner = Mcts(self.stateman, self.current_state,
                                   self.simulations)
                self.train_x.append(self.current_state.grid)

                new_state, train_y = test.run()
                self.train_y.append(train_y)

                self.current_player *= -1
                self.current_state = new_state

        for i in range(len(self.train_x)):
            print(str(self.train_x[i]) + " = " + str(self.train_y[i]))

        model = hex_neural_network(np.array(self.train_x),
                                   np.array(self.train_y), len(self.n_x_n))
        model.train()

        if (self.plotter):
            tree_plot(test.current_state)
Ejemplo n.º 10
0
from mcts import Mcts
from nim.nim import Nim

print("Hello in Nim")
piles, objects = input(
    "Set game settings (`number of piles` `number of objects`): ").split()

game = Nim(int(piles), int(objects))
tree = Mcts(game)
tree.run(1)

while not game.done:
    print(game.piles)
    # move = input("Your move (`pile` `objects`): ").split()
    # action = tuple(int(x) for x in move)
    tree.run(1200)
    action = tree.predict()
    print('CPU 0 move: %s' % str(action))
    game.act(action)
    tree.move_root(action)

    if game.done:
        print("You won!")
        exit()

    print(game.piles)

    tree.run(1200)
    action = tree.predict()
    game.act(action)
    tree.move_root(action)
Ejemplo n.º 11
0
        neural_net_.load_state_dict(torch.load(last_path))
        iters_ = 0
    else:
        print(
            "Loading model failed: no saved model found, reinitilize neural network"
        )
        iters_ = 0
else:
    iters_ = 0
WebAgent = env.web_agent
print("Learning...")

for _ in tqdm(range(n_iters)):
    iters_ += 1
    memory_ = NNMemoryAnyState(neural_net_, env)
    mcts = Mcts(n_mcts, env, max_depth=100)

    if n_pools > 0:

        @ray.remote
        def do_episode_(i):
            return mcts.self_play(memory_, i)

        exps = ray.get([do_episode_.remote(i) for i in range(n_eps)])
    else:
        exps = [mcts.self_play(memory_, i) for i in range(n_eps)]
    exps_arrays = [
        np.concatenate([ex[i] for ex in exps], axis=0) for i in range(4)
    ]
    neural_net_.train_(*exps_arrays)
Ejemplo n.º 12
0
class Game:
    def __init__(self, client: berserk.Client, game_id, player_id, explore_weight=1.0):
        self.client = client

        self.game_id = game_id
        self.player_id = player_id
        self.explore_weight = explore_weight

        self.stream = client.bots.stream_game_state(game_id)

        game_info = next(self.stream)
        self.initial_fen = game_info["initialFen"]
        if self.initial_fen == "startpos":
            self.initial_fen = STARTING_FEN

        print("Initial FEN: ", self.initial_fen)

        self.tree = Mcts(explore_weight=self.explore_weight)
        self.node = Node(fen=self.initial_fen)
        self.turn_speed_seconds = 10  # TODO

        self.my_turn = game_info["white"]["id"] == self.player_id
        if self.my_turn:
            self.make_move()

        # self.turn_speed = self.initial_state["clock"]["increment"]

    def run(self):
        for event in self.stream:
            print("Got event: ", event)
            if event["type"] == "gameState":
                self.handle_state_change(event)
            elif event["type"] == "chatLine":
                self.handle_chat_line(event)

    def make_move(self):
        start = time.time()
        print("Thinking...")
        think_count = 0
        while time.time() - start < self.turn_speed_seconds:
            self.tree.rollout(self.node)
            think_count += 1
        print(f"Thought of {think_count} moves")

        new_node = self.tree.choose(self.node)
        print(f"Move score: : {self.tree.rewards[new_node]}/{self.tree.visit_count[new_node]}")

        # Make the selected move
        move_str = get_move(self.node, new_node)
        print("Making move", move_str)
        self.client.bots.make_move(self.game_id, move_str)

        self.node = new_node

    def handle_state_change(self, event):
        board = Board(fen=self.initial_fen)
        for move in event["moves"].split():
            board.push(Move.from_uci(move))
        self.node = Node(fen=board.fen())
        self.my_turn = not self.my_turn

        print(f"My turn? {self.my_turn}")

        if self.my_turn:
            self.make_move()

    def handle_chat_line(self, event):
        print(event)
Ejemplo n.º 13
0
from random_player import Random
from minimax import Minimax
from play_game import play_game, play_games
from qlearning import QLearning
from mcts import Mcts
from human import Human
from qneural import QNeural
from abpruning import ABPruning
from board.tictactoe import TicTacToe
from torch.nn import MSELoss
from board.mancala import Mancala

human = Human()
tree = Mcts()
minimax = Minimax()
random = Random()
ab_pruning = ABPruning()

# tree.train(Mancala(), 2000000)
#
# play_games(100, tree, random, Mancala())
# play_games(100, random, tree, Mancala())

# x_learning = QLearning()
# o_learning = QLearning()
#
#
# neural = QNeural(MSELoss())

#neural.load("./neural_checkpoints/checkpoint_1607248678_200000")
# neural.train(1, Mancala(), Random(), 200000)
Ejemplo n.º 14
0
 def restart_game(self, otherPlayerIndex=-1):
     self.game = game_state(self.config.common.game_board_size)
     self.mcts = Mcts(modelConfig(), -otherPlayerIndex, self.model)
     self.aiplayer = -otherPlayerIndex
Ejemplo n.º 15
0
    4: LazyAgent
}

print("Welcome in Pong")
selected_opponent, = input(
    "Select opponent for MCTS (1 - Random, 2 - Safe, 3 - Aggressive, 4 - Lazy): "
).split()

game = PongGame()
game = PongMonitor(game, ".", force=True)
game.reset()

opponent = possible_opponents[int(selected_opponent)]()
mcts_agent = GreedyAgent()

tree = Mcts(game, simulation_agent=mcts_agent)
# tree = Mcts(game)

count = 0

while not game.done:
    count = count + 1
    start = time()
    tree.run(30, verbose=True)
    stop = time()
    ob = game._get_obs()
    # if ob is not None:
    #     game.ale.saveScreenPNG('images/' + str(count) + '-state.png')
    #     print(count, end=" ")
    #     for i, val in enumerate(ob):
    #         print(val, end=" ")
Ejemplo n.º 16
0
    filename = './logss/' + playout[
        'method'] + '-no-skip-1.41/pong-' + playout['method'] + '-' + str(
            playout['runs']) + '-against-' + opponent_names[playout[
                'agent']] + '_' + datetime.now().strftime("%Y%m%d-%H%M%S")
    game = PongMonitor(game, filename, force=False)
    game.reset()

    pong_logger = PDLogger(filename)
    opponent = possible_opponents[agent]()
    mcts_agent = GreedyAgent()

    tree = None
    if playout['method'] == 'greedy':
        tree = Mcts(game,
                    simulation_agent=mcts_agent,
                    logger=pong_logger,
                    skip_actions=playout['skip_actions'],
                    exploration_parameter=playout['exploration_parameter'])

    if playout['method'] == 'random':
        tree = Mcts(game,
                    logger=pong_logger,
                    skip_actions=playout['skip_actions'],
                    exploration_parameter=playout['exploration_parameter'])

    count = 0

    while not game.done:
        count = count + 1
        start = time()
        tree.run(playout['runs'], verbose=True)