Python run_mcts 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: self_play.mcts

메소드/함수: run_mcts

hotexamples.com에서의 예제들: 3

Python run_mcts - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 self_play.mcts.run_mcts에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: self_play.py 프로젝트: marioyc/MuZero

def play_game(config: MuZeroConfig,
              network: AbstractNetwork,
              train: bool = True) -> AbstractGame:
    """
    Each game is produced by starting at the initial board position, then
    repeatedly executing a Monte Carlo Tree Search to generate moves until the end
    of the game is reached.
    """
    game = config.new_game()
    mode_action_select = 'softmax' if train else 'max'

    while not game.terminal() and len(game.history) < config.max_moves:
        # At the root of the search tree we use the representation function to
        # obtain a hidden state given the current observation.
        root = Node(0)
        current_observation = game.make_image(-1)
        expand_node(root, game.to_play(), game.legal_actions(),
                    network.initial_inference(current_observation))
        if train:
            add_exploration_noise(config, root)

        # We then run a Monte Carlo Tree Search using only action sequences and the
        # model learned by the networks.
        run_mcts(config, root, game.action_history(), network)
        action = select_action(config,
                               len(game.history),
                               root,
                               network,
                               mode=mode_action_select)
        game.apply(action)
        game.store_search_statistics(root)
    return game

예제 #2

파일 보기

파일: self_play.py 프로젝트: zpear/FastCentipede

def play_game(config: MuZeroConfig,
              storage: SharedStorage,
              train: bool = True,
              visual: bool = False,
              queue: Queue = None) -> AbstractGame:
    """
    Each game is produced by starting at the initial board position, then
    repeatedly executing a Monte Carlo Tree Search to generate moves until the end
    of the game is reached.
    """
    if queue:
        network = storage.latest_network_for_process()
    else:
        network = storage.current_network

    start = time()
    game = config.new_game()
    mode_action_select = 'softmax' if train else 'max'
    while not game.terminal() and len(game.history) < config.max_moves:
        # At the root of the search tree we use the representation function to
        # obtain a hidden state given the current observation.
        root = Node(0)
        current_observation = game.make_image(-1)
        expand_node(root, game.to_play(), game.legal_actions(),
                    network.initial_inference(current_observation))
        if train:
            add_exploration_noise(config, root)

        # We then run a Monte Carlo Tree Search using only action sequences and the
        # model learned by the networks.
        run_mcts(config, root, game.action_history(), network)
        action = select_action(config,
                               len(game.history),
                               root,
                               network,
                               mode=mode_action_select)
        game.apply(action)
        game.store_search_statistics(root)
        if visual:
            game.env.render()
    if visual:
        if game.terminal():
            print('Model lost game')
        else:
            print('Exceeded max moves')
        game.env.close()

    if queue:
        queue.put(game)
    print("Finished game episode after " + str(time() - start) +
          " seconds. Exceeded max moves? " + str(not game.terminal()))
    print("Score: ", sum(game.rewards))
    return game

예제 #3

파일 보기

def play_game(config: MuZeroConfig, network: AbstractNetwork, train: bool = True) -> AbstractGame:
  
    game = config.new_game()
    mode_action_select = 'softmax' if train else 'max'

    while not game.terminal() and len(game.history) < config.max_moves:
        root = Node(0)
        current_observation = game.make_image(-1)
        expand_node(root, game.to_play(), game.legal_actions(), network.initial_inference(current_observation))
        if train:
            add_exploration_noise(config, root)

        run_mcts(config, root, game.action_history(), network)
        action = select_action(config, len(game.history), root, network, mode=mode_action_select)
        game.apply(action)
        game.store_search_statistics(root)
    return game