def mcts_fn(config, q_mcts_to_train, q_mcts_to_eval, p_eval_to_mcts,
            process_id):
    set_config(config)
    print('Starting MCTS process %s' % process_id)
    sys.stdout.flush()
    np.random.seed(process_id)

    p_em_recv, p_em_send = p_eval_to_mcts
    p_em_send.close()

    def eval_state(state):
        # randomly rotate and flip before evaluating
        k = np.random.randint(4)
        flip = np.random.randint(2)
        if k != 0:
            state = np.rot90(state, k=k, axes=(-2, -1))
        if flip:
            state = np.flip(state, axis=-1)
        q_mcts_to_eval.put((process_id, state.tolist()))
        v, p = p_em_recv.recv()
        p = np.array(p, dtype=np.float32)
        if flip:
            p = np.flip(p, axis=-1)
        if k != 0:
            p = np.rot90(p, k=-k, axes=(-2, -1))
        return v, p

    # (curr_player, opponent, last_opponent_move, is_curr_player_first)
    start_state = get_start_state()
    mcts = MCTS(start_state, eval_state)
    while True:
        q_mcts_to_train.put(tuple(x.tolist() for x in mcts.run()))
    def __init__(self, player, nb_rows, nb_cols, timelimit):
        """Create Dots and Boxes agent.

        :param player: Player number, 1 or 2
        :param nb_rows: Rows in grid
        :param nb_cols: Columns in grid
        :param timelimit: Maximum time allowed to send a next action.
        """
        self.moves_made = []

        self.player = {player}
        self.timelimit = timelimit
        self.ended = False
        self.nb_rows = nb_rows
        self.nb_cols = nb_cols
        rows = []
        for ri in range(nb_rows + 1):
            columns = []
            for ci in range(nb_cols + 1):
                columns.append({"v": 0, "h": 0})
            rows.append(columns)
        self.cells = rows
        free_lines = []
        for ri in range(len(self.cells)):
            row = self.cells[ri]
            for ci in range(len(row)):
                cell = row[ci]
                if ri < (len(self.cells) - 1) and cell["v"] == 0:
                    free_lines.append((ri, ci, "v"))
                if ci < (len(row) - 1) and cell["h"] == 0:
                    free_lines.append((ri, ci, "h"))
        self.mcts = MCTS(self.cells, free_lines, player, timelimit)
Exemple #3
0
    def play_episode(self):
        obs = self.env.reset()
        env_state = self.env.get_state()

        done = False
        t = 0
        total_reward = 0.0

        mcts = MCTS(self.config)

        root_node = Node(state=env_state,
                         done=False,
                         obs=obs,
                         reward=0,
                         action=None,
                         parent=RootParentNode(env=self.env_creator()),
                         mcts=mcts,
                         depth=0)

        while not done:
            t += 1
            # compute action choice
            action, root_node = mcts.compute_action(root_node)
            # remove old part of the tree that we wont use anymore
            root_node.parent = RootParentNode(env=self.env_creator())

            # take action
            obs, reward, done, info = self.env.step(action)
            if self.config["render"]:
                self.env.render()
            total_reward += reward
        self.env.close()
        return t, total_reward
Exemple #4
0
def execute_episode(network, replay_buffer, experiment):
    examples = []
    board = Game(player_turn=1)
    mcts = MCTS(board.clone(), network)
    temp = 1.0
    i = 0
    while not board.over():
        i += 1
        if i >= experiment.get_parameter('temp_decrese_moves'):
            t = 10e-3
        # perform mcts search
        for i in range(experiment.get_parameter('mcts_rollouts')):
            mcts.search(mcts.root, board.clone())

        # choose the action
        N_total = np.sum(np.array(list(mcts.root.N.values()))**(1 / temp))
        pi = np.zeros(6)
        for a in mcts.root.actions:
            pi[a] = mcts.root.N[a]**(1 / temp) / N_total
        action = np.random.choice(np.arange(6), p=pi)
        # add the move to the replay buffer
        replay_buffer.add(board.board(), action, pi, mcts.root.v_mult,
                          board.valid_moves())
        print("Board {}, action {}, MCTS probabilities {}".format(
            board.board(), action, pi))
        board.move(action)
        if board.over():
            replay_buffer.finish_episode(board.winner())
            return board.winner()
        mcts.root = mcts.root.children[action]
Exemple #5
0
class Policy:
    def __init__(self, model, name, player, cfg, timeout):
        self.cfg = cfg
        self.mct = MCTS(None, player, None, self.cfg['training']['c'])
        self.name = name
        self.player = player
        self.random_move_prob = 1
        self.timeout = timeout

    def get_move(self, game):

        s_init = game.to_string_representation()
        root = Node(None, None, self.player, s_init)
        self.mct.root = root
        self.mct.root_state = deepcopy(game)

        start_time = time.time()

        while time.time() - start_time < self.timeout:
            leaf_node, leaf_state, path, actions = self.mct.select()
            turn = leaf_state.player
            outcome = self.mct.rollout(leaf_state, path)
            # print(f'{outcome} {outcome == self.mct.player}')
            self.mct.backprop(leaf_node, turn, outcome, path, actions)

        dist = self.mct.get_action_distribution()
        return game.LEGAL_MOVES[dist.index(max(dist))]
Exemple #6
0
    def initialize(self, gameData, player):
        try:
            # Initializng the command center, the simulator and some other things
            self.inputKey = settings.JVM.struct.Key()
            self.frameData = settings.JVM.struct.FrameData()
            self.cc = settings.JVM.aiinterface.CommandCenter()

            self.player = player
            self.gameData = gameData
            self.score = 0
            self.game_num += 1
            print(f"Starting game number {self.game_num}.")

            self.input_builder = InputBuilder(self.gameData, self.player)
            os.environ['CUDA_VISIBLE_DEVICES'] = ''

            # Make graph default for session + make a random prediction because the initial prediction can take longer
            with self._graph.as_default():
                self.nn.predict(self.input_builder.build_random())

            self.mcts = MCTS(self.gameData.getSimulator(), self.nn,
                             self.input_builder)
            self.action_thread = None
            self.action = None
            self.training_examples = []

        except BaseException as e:
            print(f"INIT ERROR: {e.args}", flush=True)
            raise e

        return 0
Exemple #7
0
def play_game():
    global data
    tree = MCTS()
    board = new_tic_tac_toe_board()
    while True:
        for _ in range(100):
            tree.do_rollout(board)
        new_board = tree.choose(board)
        if (not new_board.rand):
            state = list(
                map(lambda x: '0' if x is None else '1'
                    if x else '-1', board.tup))
            data['State'].append(state)
            data['Turn'].append('1' if board.turn ==
                                True else '-1' if board.turn == False else '0')
            for i in range(9):
                if board.tup[i] != new_board.tup[i]:
                    data['Action'].append(i)
        board = new_board
        print(board.to_pretty_string())
        if board.winner is not None:
            print('X wins' if board.winner else 'O wins')
            print()
        if board.terminal:
            break
Exemple #8
0
def mcts_search_worker(nn_thread_edge_queue, nn, is_cuda, max_game_length,
                       peace, simulations_per_play, debug, epoch,
                       new_time_steps):
    mcts = MCTS(nn_thread_edge_queue, nn, is_cuda, max_game_length, peace,
                simulations_per_play, debug)
    mcts.play_until_terminal()
    new_time_steps += mcts.time_steps
 def __init__(self, game, nnet, args):
     self.game = game
     self.nnet = nnet
     self.args = args
     self.mcts = MCTS(self.game, self.nnet, self.args)
     self.train_history = []
     self.skip_first = False
Exemple #10
0
    def learn(self):
        for i in range(self.config.num_iters):
            self_play = SelfPlay(self.game, self.model)
            examples = self_play.generate_play_data()
            for _ in range(self.config.num_episodes):
                examples += self_play.generate_play_data()
            examples = self.examples_to_array(examples)
            examples = self.shuffle_examples(examples)

            # Step 1. Keep a copy of the current model
            self.model.save_checkpoint(filename='temp.pth.tar')
            self.prev_model.load_checkpoint(filename='temp.pth.tar')

            # Step 2. Training the model
            prev_mcts = MCTS(self.game, self.prev_model, self.config.c_puct, self.config.num_sims)
            self.model.train(examples)
            new_mcts = MCTS(self.game, self.model, self.config.c_puct, self.config.num_sims)

            # Step 3. Evaluate the model
            print 'PITTING AGAINST PREVIOUS VERSION'
            arena = Arena(self.game, new_mcts, prev_mcts)
            # Player 1 is the optimized player
            player1_win, player2_win, draw = arena.play_matches(self.config.arena_games)
            print 'NEW MODEL/PREV MODEL WINS : %d / %d ; DRAWS : %d' % (player1_win, player2_win, draw)

            if ((player1_win * 1.0) / self.config.arena_games) > self.config.arena_threshold:
                print 'ACCEPTING NEW MODEL'
                self.model.save_checkpoint(filename=self.getCheckpointFile(i))
                self.model.save_checkpoint(filename='best.pth.tar')
            else:
                print 'REJECTING NEW MODEL'
                self.model.load_checkpoint(filename='temp.pth.tar')
Exemple #11
0
 def __init__(self, model, name, player, cfg, timeout):
     self.cfg = cfg
     self.mct = MCTS(None, player, None, self.cfg['training']['c'])
     self.name = name
     self.player = player
     self.random_move_prob = 1
     self.timeout = timeout
Exemple #12
0
class Game:
    def __init__(self, ai, first=1):
        self.mcts = MCTS(ai.estimator, maxiter=ai.mcts_iters, first=first)

    @property
    def over(self):
        return self.mcts.state.over

    @property
    def best_action(self):
        return State.domain[np.argmax(self.mcts.search())]

    @property
    def winner(self):
        return State.player_codes[self.mcts.state.winner]

    def apply(self, action):
        if action not in self.mcts.state.actions:
            raise IllegalActionException(f"tried illegal action {action}")
        self.mcts.apply(action)
        return self

    @staticmethod
    def coord_to_action(coord):
        m = re.search("([a-zA-Z])(\d)$", coord)
        if m:
            return State.raw_shape[0] * (ord(m[1].upper()) - ord('A')) \
                    + int(m[2]) - 1

    @staticmethod
    def action_to_coord(action):
        return chr(action // 9 + ord('A')) + str(action % 9 + 1)

    def __repr__(self):
        return str(self.mcts.state)
Exemple #13
0
class MCTSAgent():
    def __init__(self,
                 policy_module,
                 rollout_module,
                 playout_depth=10,
                 n_playout=100):

        if policy_module == None and rollout_module == None:

            self.policy_fn = self.rollout_policy_fn = random_policy_fn
            self.value_fn = random_value_fn

        else:

            self.value_fn = policy_module.value_fn
            self.policy_fn = policy_module.policy_fn
            self.rollout_policy_fn = rollout_module.policy_fn

        self.mcts = MCTS(value_fn=self.value_fn,
                         policy_fn=self.policy_fn,
                         rollout_policy_fn=self.rollout_policy_fn,
                         playout_depth=playout_depth,
                         n_playout=n_playout)

    def step(self, timestep, env):

        move = self.mcts.get_move(timestep, env)
        self.mcts.update_with_move(move)
        return move
Exemple #14
0
def fight(net1, net2):
    numGame = 10
    win_net1 = 0
    win_net2 = 0
    mcts = MCTS()

    for color in [BLACK, WHITE]:
        for e in range(int(numGame / 2)):
            print ('[FIGHTING] game number ', e)
            board = game.GameBoard()
            board.play(randint(0, 360)) # on part d'une position random
            while not board.gameEnd():

                if board.player_turn == color:
                    moves = mcts.pi(board, net1)
                else:
                    moves = mcts.pi(board, net2)
                a = moves.index(max(moves))
                board.play(a)
            print ('end, winner = ', "White" if board.reward == -1 else "Black")
            board.display_board()

            if board.player_turn == color: #le nouveau réseau a perdu
                win_net2 += 1
            else:
                win_net1 += 1

    print ('bilan de l\'affrontement: ', win_net1, ' / ', win_net2)

    return win_net1 / numGame
    def maximum_similarity_model(model,
                                 clusters,
                                 scaler,
                                 MAX_CLUSTERS,
                                 NOISE_PARAM,
                                 similarity_mean,
                                 similarity_std,
                                 env=None):
        sim = similarity[model]
        node = run_mcts(clusters, similarity, scaler, MAX_CLUSTERS,
                        NOISE_PARAM, similarity_mean, similarity_std,
                        action_count)(idx=0,
                                      cluster=1,
                                      similarity=sim[0],
                                      terminal=False)
        mcts = MCTS(env=env)

        while True:
            for i in range(25):
                mcts.do_rollout(node)
            node, score = mcts.choose(node)
            if node.terminal:
                break
        idxs = np.where((similarity == node.similarity))
        idxs = np.where((clusters[idxs[0]] == node.cluster))[0]
        state_selected = idxs[0]
        return state_selected, score
Exemple #16
0
class Student():
    def __init__(self, model_path=None, insight=False, time=1, learning=False):
        self.brain = Brain(model_path)
        self.mcts = MCTS(self.brain, time=time, learning=learning)
        self.memory = Memory(PICK_SIZE)
        self.insight = insight

    def move(self, gomoku, root=None):
        root = self.mcts.search(gomoku, root)
        probabilities = self.mcts.rank_children(root)
        best_child = self.mcts.choose_best_child(root)
        return best_child, probabilities

    def save_knowledge(self, classes):
        self.memory.add(classes)

    def learn(self, save=False, name=None):
        states = []
        values = []
        probabilities = []
        for i in range(NO_OF_PICKS):
            samples = self.memory.sample()
            states.extend([a[0] for a in samples])
            values.extend([a[1] for a in samples])
            probabilities.extend([a[2] for a in samples])
        self.brain.learn(states, values, probabilities, save, name)
        self.memory.clear()
Exemple #17
0
def main(args):
    if args.player1 == "human":
        agent1 = Human(1, surface)
    elif args.player1 == "minimax":
        agent1 = Minimax(1, args.minimax_depth[0], args.variant)
    elif args.player1 == "mcts":
        agent1 = MCTS(1, args.mcts_depth[0], args.mcts_rollouts[0],\
         args.variant, args.heuristic_rollouts[0], \
         args.input_file[0] if args.input_file else None, args.output_file[0] if args.output_file else None, args.ucb_const[0])

    if args.player2 == "human":
        agent2 = Human(-1, surface)
    elif args.player2 == "minimax":
        agent2 = Minimax(-1, args.minimax_depth[1], args.variant)
    elif args.player2 == "mcts":
        agent2 = MCTS(1, args.mcts_depth[1], args.mcts_rollouts[1],\
         args.variant, args.heuristic_rollouts[1], args.input_file[1] if len(args.input_file) == 2 else None,\
          args.output_file[1] if len(args.output_file) == 2 else None, args.ucb_const[1])

    for i in range(args.num_games):
        play_game(agent1, agent2, surface, args.variant, args.wait_between)
        if type(agent1) == MCTS:
            agent1.reset(1)
        if type(agent2) == MCTS:
            agent2.reset(-1)
        if args.alternate_sides:
            agent1.switch_sides()
            agent2.switch_sides()
            temp = agent1
            agent1 = agent2
            agent2 = temp
        if type(agent1) == MCTS:
            agent1.store_root()
        if type(agent2) == MCTS:
            agent2.store_root()
Exemple #18
0
 def test_select_expand(self):
     env = gym.make('MiniGrid-Empty-5x5-v0')
     mcts_obj = MCTS(env)
     self.assertEqual(mcts_obj.root_node.children, [])
     path = mcts_obj.select_expand()
     self.assertEqual(path, [0])
     self.assertEqual(len(mcts_obj.root_node.children), 7)
Exemple #19
0
    def _AI_player(self):
        '''the interface for AI
        Parameters required and updated: board status, which side to play 
        Return: the next gomoku piece coordinate (x, y)

        Gomoku Board status: 0 means no pieces, 1 means black pieces and -1 means white pieces
        '''

        self.human = False

        if self.is_start == False:
            return

        # AI_program

        AI = MCTS()
        AI = Alpha(model_file=self.model_file, use_gpu=False)
        [x, y] = AI.play(self.row, self.column, self.board)

        self._draw_piece(x, y, self.is_black)
        self.board[x][y] = self._ternary_op(1, -1, self.is_black)

        self.last_x, self.last_y = x, y
        self._gomoku_who_win()

        self.is_black = not self.is_black
        self.l_info.config(
            text=self._ternary_op('黑方行棋', '白方行棋', self.is_black))
        self.human = True
Exemple #20
0
def play_game(agent0, agent1, mcts_iter):
    board = Board()

    steps = 0
    # agents = (agent0, agent1)
    agents = ((agent0, MCTS(agent0, n_iter=mcts_iter)),
              (agent1, MCTS(agent1, n_iter=mcts_iter)))
    curr_agent_idx = random.choice([0, 1])
    samples_buffer = []
    while True:
        steps += 1

        # MCTS
        agent, mcts = agents[curr_agent_idx]
        try:
            root_node, mcts_p, action_p, value = mcts.search(
                board, curr_agent_idx)
            # root_node, mcts_p, action_p, value = mcts(board, agent, curr_agent_idx, n_iter=mcts_iter)
        except TerminalStateException:
            break

        state, valid_positions, valid_positions_mask = root_node.state
        if steps <= 20:
            action_idx = np.random.choice(len(mcts_p), p=mcts_p)
        else:
            action_idx = np.argmax(mcts_p)

        # /MCTS

        # No mcts
        # agent = agents[curr_agent_idx]
        # state, valid_positions, valid_positions_mask = get_state(board, curr_agent_idx)

        # if len(valid_positions) == 0:
        #     break

        # action_p, value = agent(tf.convert_to_tensor([state], dtype=tf.float32))
        # action_p = action_p[0].numpy() * valid_positions_mask.reshape((-1,))
        # value = value[0].numpy()
        # action_idx = np.random.choice(len(action_p), p=action_p / np.sum(action_p))
        # /No mcts

        if curr_agent_idx == 0:
            samples_buffer.append(
                [state, action_p[action_idx], action_idx, value])

        position_key = (int(action_idx / board.size),
                        int(action_idx % board.size))
        board.apply_position(curr_agent_idx, valid_positions[position_key])

        curr_agent_idx = 1 - curr_agent_idx

    reward = 0
    player0_score, player1_score = board.scores()
    if player0_score < player1_score:
        reward = -1
    elif player1_score < player0_score:
        reward = 1

    return samples_buffer, reward, steps
Exemple #21
0
 def mcts_refresh_game(self):
     with torch.no_grad():
         self.nn.eval()
         self.time_steps = []
         for i in range(self.game_size):
             nn_thread_edge_queue = queue.Queue(maxsize=self.max_queue_size)
             # def gpu_thread_worker(nn, queue, eval_batch_size, is_cuda):
             gpu_thread = threading.Thread(
                 target=gpu_thread_worker,
                 args=(self.nn, nn_thread_edge_queue, self.eval_batch_size,
                       self.is_cuda))
             gpu_thread.start()
             mcts = MCTS(nn_thread_edge_queue, self.nn, self.is_cuda,
                         self.max_game_length, self.simulations_per_play,
                         self.debug)
             mcts.play_until_terminal()
             nn_thread_edge_queue.put(None)
             # print("Terminal sentinel is put on queue")
             nn_thread_edge_queue.join()
             if self.debug:
                 print("Queue has joined")
             gpu_thread.join()
             if self.debug:
                 print("Thread has joined")
             self.time_steps += mcts.time_steps
             print("Successful generation of one game")
             print("Queue empty:", nn_thread_edge_queue.empty())
Exemple #22
0
    def run_batch(self):
        """
    Runs G games of the specified type (Nim or Ledge). All parameters are fixed
    for all runs. Summarizes the results of the batch run in a print-sentence. 
    Creates a new instance of the game and for each move, asks the agent for an
    action. This action is applied and chancges the state of the board. When a
    final state is reached, the results are given to the agent for backpropagation
    and a new game instance is made.
    Returns a list of round winners
    """
        agent = MCTS(exploration_rate=self.c)
        win_stats = []

        game = self.create_game()
        tree = Tree(game)

        for i in range(self.G):
            state = tree.root

            while (not game.is_terminal_state()):
                best_child = agent.uct_search(tree, state, self.M)
                game.move(best_child.move)
                state = best_child

            win_stats.append(game.get_active_player())
            game = self.create_game()
            tree = Tree(game)

        self.summarize_batch(win_stats)
        return win_stats
Exemple #23
0
def play_game():
    tree = MCTS()
    board = new_tic_tac_toe_board()
    print(board.to_pretty_string())
    while True:
        row_col = input("Enter row.col: ")
        row, col = map(int, row_col.split("."))
        index = 3 * (row - 1) + (col - 1)
        if board.tup[index] is not None:
            print("Invalid move")
            continue
        board = board.make_move(index)
        print(board.to_pretty_string())
        if board.leaf:
            if board.winner:
                print("Vous avez gagné !")
            elif board.winner is None:
                print("Match nul !")
            else:
                print("Vous avez perdu !")
            break

        # MCTS
        for _ in range(150):
            tree.search(board)
        board = tree.chooseBestNode(board)
        print(board.to_pretty_string())
        if board.leaf:
            if board.winner:
                print("Vous avez gagné !")
            elif board.winner is None:
                print("Match nul !")
            else:
                print("Vous avez perdu !")
            break
Exemple #24
0
    def getMove(self, game):
        evaluator = SimpleEvaluator()
        mcts = MCTS(evaluator)

        mcts.run(game)
        # mcts.dump()
        return mcts.getBestMove()
Exemple #25
0
class Gomoku4(object):
    """
    For each move do `n_simualtions_per_move` playouts,
    then select the one with best win-rate.
    playout could be either random or rule_based (i.e., uses pre-defined patterns) 
    """
    def __init__(self, n_simualtions_per_move=10000, board_size=7, exploration=0.4):
        self.n_simualtions_per_move=n_simualtions_per_move
        self.board_size=board_size
        self.exploration = exploration
        self.parent = None

        self.name="Gomoku4"
        self.version = 4.0
        self.best_move=None

        self.MCTS = MCTS()

    def reset(self):
        self.MCTS = MCTS()

    def update(self, move):
        self.parent = self.MCTS._root 
        self.MCTS.update_with_move(move)

    def get_move(self, board, color_to_play):
        """
        The genmove function called by gtp_connection
        """
        move = self.MCTS.get_move(board, color_to_play, self.n_simualtions_per_move, self.exploration)
        self.update(move)
        return move
    def _act(self, obs, action_space):
        state = self._create_sim_state(obs)

        env_state = _EnvState(state, self._character.agent_id, self._sim_env, self._net)

        selected_actions = None
        selected_actions_prs = None
        if self._is_self_play and self._step_count <= self._num_exploration_steps:
            temp = 1.0
        else:
            temp = 1e-3
        searcher = MCTS(env_state, temp=temp, iteration_limit=self._iteration_limit, is_self_play=self._is_self_play)
        for i, (actions, action_prs) in enumerate(searcher.search()):
            if i == self._character.agent_id:
                self._training_states_self += self._get_training_states(i)
                self._action_prs_self.append(action_prs)

                selected_actions = actions
                selected_actions_prs = action_prs
            else:
                self._training_states_other += self._get_training_states(i)
                self._action_prs_other.append(action_prs)

        np.random.seed(int.from_bytes(os.urandom(4), byteorder='little'))
        action = np.random.choice(selected_actions, p=selected_actions_prs)

        return action
    def step(self, state):
        """ Bot takes a step by running MCTS and selecting a move

        Args:
            state: current game state

        Returns:
            policy: list of actions and corresponding probabilities
            action: real action that should be performed

        """
        # Defaults to keep the tree during a game
        if self.keep_search_tree:
            action_history = state.history()
            if self.self_play:
                # Update root of the tree with last action
                if action_history:
                    self.mcts.update_root(action_history[-1])
            else:
                # Update root of the tree with last two actions (last opponent and own moves)
                if len(action_history) >= 2:
                    self.mcts.update_root(action_history[-2])
                    self.mcts.update_root(action_history[-1])

        # Create a new MCTS search tree
        else:
            self.mcts = MCTS(self.policy_fn, self.num_distinct_actions,
                             **self.kwargs)

        # Perform the MCTS
        normalized_visit_counts = np.array(self.mcts.search(state))
        legal_actions = state.legal_actions(state.current_player())

        # Remove illegal actions
        normalized_visit_counts_legal_actions = remove_illegal_actions(
            normalized_visit_counts, legal_actions)

        # Action probabilities based on temperature
        action_probabilities = normalized_visit_counts_legal_actions**(
            1. / self.temperature) / sum(normalized_visit_counts_legal_actions
                                         **(1. / self.temperature))

        # Select the action, either probabilistically or simply the best.
        if self.use_random_actions and len(
                state.history()) < self.num_probabilistic_actions:
            action = np.random.choice(legal_actions)
        elif self.use_probabilistic_actions and len(
                state.history()) < self.num_probabilistic_actions:
            action = np.random.choice(len(action_probabilities),
                                      p=action_probabilities)
        else:
            action = np.argmax(action_probabilities)

        # Set the training targets for the policy
        policy = []
        for act in legal_actions:
            policy.append((act, normalized_visit_counts_legal_actions[act]))

        return policy, action
Exemple #28
0
def test_run_sim():
    
    mcts = MCTS()
    game = ConnectFour(4,4, True,True, "testMCTS")
    root = mcts.run_sim(game )
    print("tree after sim")
    mcts.print_tree(root)
    shutil.rmtree("testMCTS")
Exemple #29
0
class MCTSPlayer(object):
    def __init__(self, pvnet_fn, play_style=0):
        self.mcts = None
        self.pvnet_fn = pvnet_fn
        self.play_style = play_style
        self.rollout_times = config.mcts_player_config['rollout_times']
        self.dirichlet_eps = config.mcts_player_config['dirichlet_eps']

    def init(self):
        self.mcts = None

    def start_tree_search(self):
        for i in range(self.rollout_times):
            self.mcts.tree_search()

    def get_move_policy_value(self, board):
        if self.mcts == None:
            self.mcts = MCTS(board, self.pvnet_fn)
        else:
            self.mcts.update_move(board)

        self.start_tree_search()

        policy, value = self.mcts.get_policy_value()
        #policy[1][policy[1] < 0.01] = 0.0
        sum = policy[1].sum()
        #if sum == 0.0:
        #	print("sum err:", sum)
        #if sum != 1.0:
        #	policy[1] /= policy[1].sum()
        #print(policy)

        # lose
        if sum == 0.0:
            move = np.random.choice(policy[0])
        # win
        elif sum > 1.5:
            move = policy[0][np.random.choice(np.nonzero(policy[1])[0])]
        elif self.play_style == 0:
            eps = self.dirichlet_eps
            dirichlet = np.random.dirichlet(0.03 * np.ones(len(policy[0])))
            p = (1.0 - eps) * policy[1] + eps * dirichlet
            p /= p.sum()
            move = np.random.choice(policy[0], p=p)
            #print("real policy", p, "move", move)
        elif self.play_style == 1:
            move = np.random.choice(policy[0], p=policy[1])
        elif self.play_style == 2:
            p = copy.deepcopy(policy[1])
            p[p < 0.01] = 0.0

            p /= p.sum()
            move = np.random.choice(policy[0], p=p)
            #move = np.random.choice(policy[0], p = policy[1])
        else:
            print("mode 3")
            move = policy[0][np.argmax(policy[1])]
        return move, policy, value
Exemple #30
0
class Nogo():
    def __init__(self,
                 num_sim,
                 sim_rule,
                 move_filter,
                 in_tree_knowledge,
                 size=7,
                 limit=100,
                 exploration=0.4):
        """
        Player that selects a move based on MCTS from the set of legal moves
        """
        self.name = "Nogo 4"
        self.version = 0.22
        self.komi = 6.5
        self.MCTS = MCTS()
        self.num_simulation = 2000
        self.limit = limit
        self.exploration = exploration
        self.simulation_policy = 'random'
        self.use_pattern = True
        self.in_tree_knowledge = in_tree_knowledge
        self.parent = None

    def reset(self):
        self.MCTS = MCTS()

    def update(self, move):
        self.parent = self.MCTS._root
        self.MCTS.update_with_move(move)

    def get_move(self, board, toplay):
        move = self.MCTS.get_move(
            board,
            toplay,
            komi=self.komi,
            limit=self.limit,
            #check_selfatari=self.check_selfatari,
            use_pattern=self.use_pattern,
            num_simulation=self.num_simulation,
            exploration=self.exploration,
            simulation_policy=self.simulation_policy,
            in_tree_knowledge=self.in_tree_knowledge)
        self.update(move)
        return move

    def get_node_depth(self, root):
        MAX_DEPTH = 100
        nodesAtDepth = [0] * MAX_DEPTH
        count_at_depth(root, 0, nodesAtDepth)
        prev_nodes = 1
        return nodesAtDepth

    def get_properties(self):
        return dict(
            version=self.version,
            name=self.__class__.__name__,
        )