Python monte_carlo_tree_search 예제들, techniques.monte_carlo.monte_carlo_tree_search Python 예제들

예제 #1

0

파일 보기

파일: train_policy_gradient.py 프로젝트: internetvandingen/PUT

        def make_training_move(board_state, side):
            if cnn_on:
                # We must have the first 3x3 board as first 9 entries of the list, second 3x3 board as next 9 entries etc.
                # This is required for the CNN. The CNN takes the first 9 entries and forms a 3x3 board etc.
                """If the 10 split 3x3 boards are desired, use create_3x3_board_states(board_state) here"""
                np_board_state = create_3x3_board_states(board_state)
            else:
                np_board_state = np.array(board_state)
            np_board_state[np_board_state > 1] = 0
            mini_batch_board_states.append(
                np_board_state * side
            )  # append all states are used in the minibatch (+ and - determine which player's state it was)

            rand_numb = random.uniform(0., 1.)
            if rand_numb < eps:
                move = get_random_network_move(board_state, game_spec)
            elif deterministic:
                move = get_deterministic_network_move(session,
                                                      input_layer,
                                                      output_layer,
                                                      board_state,
                                                      side,
                                                      valid_only=True,
                                                      game_spec=game_spec,
                                                      cnn_on=cnn_on)
            else:
                if mcts:
                    _, move = monte_carlo_tree_search(game_spec, board_state,
                                                      side, 27, session,
                                                      input_layer,
                                                      output_layer, True,
                                                      cnn_on, True)
                else:
                    move = get_stochastic_network_move(session,
                                                       input_layer,
                                                       output_layer,
                                                       board_state,
                                                       side,
                                                       valid_only=True,
                                                       game_spec=game_spec,
                                                       cnn_on=cnn_on)
            move_for_game = np.asarray(
                move
            )  # The move returned to the game is in a different configuration than the CNN learn move
            if cnn_on:
                # Since the mini batch states is saved the same way it should enter the neural net (the adapted board state),
                # the same should happen for the mini batch moves
                move = create_3x3_board_states(np.reshape(
                    move, [9, 9]))  # The function requires a 9x9 array
                mini_batch_moves.append(move[0:81])
            else:
                mini_batch_moves.append(move)
            return game_spec.flat_move_to_tuple(move_for_game.argmax())

예제 #2

0

파일 보기

파일: train_policy_gradient_historic.py 프로젝트: internetvandingen/PUT

        def make_training_move(board_state, side):
            if cnn_on:
                np_board_state = create_3x3_board_states(board_state)
            else:
                np_board_state = np.array(board_state)

            mini_batch_board_states.append(np_board_state * side)

            rand_numb = random.uniform(0., 1.)
            if rand_numb < eps:
                move = get_random_network_move(board_state, game_spec)
            elif deterministic:
                move = get_deterministic_network_move(session,
                                                      input_layer,
                                                      output_layer,
                                                      board_state,
                                                      side,
                                                      valid_only=True,
                                                      game_spec=game_spec,
                                                      cnn_on=cnn_on)
            else:
                if mcts:
                    _, move = monte_carlo_tree_search(game_spec, board_state,
                                                      side, 27, session,
                                                      input_layer,
                                                      output_layer, True,
                                                      cnn_on, True)
                else:
                    move = get_stochastic_network_move(session,
                                                       input_layer,
                                                       output_layer,
                                                       board_state,
                                                       side,
                                                       valid_only=True,
                                                       game_spec=game_spec,
                                                       cnn_on=cnn_on)

            move_for_game = np.asarray(
                move
            )  # The move returned to the game is in a different configuration than the CNN learn move
            if cnn_on:
                # Since the mini batch states is saved the same way it should enter the neural net (the adapted board state),
                # the same should happen for the mini batch moves
                move = create_3x3_board_states(np.reshape(
                    move, [9, 9]))  # The function requires a 9x9 array
                mini_batch_moves.append(move[0:81])
            else:
                mini_batch_moves.append(move)
            return game_spec.flat_move_to_tuple(move_for_game.argmax())

예제 #3

0

파일 보기

        def player_func(board_state, side):
            if mcts:
                _, move = monte_carlo_tree_search(game_spec, board_state, side,
                                                  27, session, input_layer,
                                                  output_layer, True, cnn_on,
                                                  True)
            else:
                move = get_deterministic_network_move(session,
                                                      input_layer,
                                                      output_layer,
                                                      board_state,
                                                      side,
                                                      valid_only=True,
                                                      game_spec=game_spec)

            move_for_game = np.asarray(
                move
            )  # The move returned to the game is in a different configuration than the CNN learn move
            return game_spec.flat_move_to_tuple(move_for_game.argmax())

예제 #4

0

파일 보기

파일: measure_network.py 프로젝트: internetvandingen/PUT

    def make_move_historical(net, board_state, side):
        if mcts:
            _, move = monte_carlo_tree_search(game_spec, board_state, side, 27,
                                              session, input_layer,
                                              output_layer, True, cnn_on, True)
        else:
            # move = get_deterministic_network_move(session, net[0], net[1], board_state, side,
            # 										valid_only = True, game_spec = game_spec, cnn_on = cnn_on)
            move = get_stochastic_network_move(session,
                                               net[0],
                                               net[1],
                                               board_state,
                                               side,
                                               valid_only=True,
                                               game_spec=game_spec,
                                               cnn_on=cnn_on)

        move_for_game = np.asarray(
            move)  # move must be an array, mcts doesn't return this
        return game_spec.flat_move_to_tuple(move_for_game.argmax())

예제 #5

0

파일 보기

    def test_move(self):
        # Test single move of monte carlo tree search algorithm
        game_spec = ut.UltimateTicTacToeGameSpec()

        # generate board with 10 random moves
        random_func = game_spec.get_random_player_func()
        board_state = ut._new_board()
        side = 1
        for _ in range(10):
            move = random_func(board_state, side)
            board_state = game_spec.apply_move(board_state, move, side)
            side = -1 * side

        print("")
        ut.print_board_state(board_state, side)

        result, move = mc._monte_carlo_sample(game_spec, board_state, side)
        print("result: ", result)
        print("move: ", move)
        mc_func = game_spec.get_monte_carlo_player_func()
        result, move = mc.monte_carlo_tree_search(game_spec, board_state, side,
                                                  100)
        print(result)
        print(move)

예제 #6

0

파일 보기

파일: uttt.py 프로젝트: internetvandingen/PUT

 def monte_carlo_player(self, board_state, side, uct, number_of_samples):
     if uct:
         _, move = mc.monte_carlo_tree_search_uct(self, board_state, side, number_of_samples)
     else:
         _, move = mc.monte_carlo_tree_search(self, board_state, side, number_of_samples)
     return move