Python Node.expand Examples

Programming Language: Python

Namespace/Package Name: mcts

Class/Type: Node

Method/Function: expand

Examples at hotexamples.com: 3

Python Node.expand - 3 examples found. These are the top rated real world Python examples of mcts.Node.expand extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Node(30)

add_child(4)

value(4)

add_exploration_noise(4)

expand(3)

is_leaf(3)

is_root(2)

search(2)

add_unique_children(1)

apply_action(1)

expand_node(1)

find_best_child(1)

get_child_node(1)

move_to(1)

reset(1)

select(1)

select_action(1)

select_action_with_temperature(1)

select_move(1)

Example #1

Show file

File: actors.py Project: JimOhman/model-based-rl

    def play_game(self, game):

        if self.config.fixed_temperatures is not None:
            self.temperature = self.config.visit_softmax_temperature(
                self.training_step)

        while not game.terminal:
            root = Node(0)

            current_observation = np.float32(game.get_observation(-1))
            if self.config.norm_obs:
                current_observation = (current_observation -
                                       self.obs_min) / self.obs_range
            current_observation = torch.from_numpy(current_observation).to(
                self.device)

            initial_inference = self.network.initial_inference(
                current_observation.unsqueeze(0))

            legal_actions = game.environment.legal_actions()
            root.expand(initial_inference, game.to_play, legal_actions)
            root.add_exploration_noise(self.config.root_dirichlet_alpha,
                                       self.config.root_exploration_fraction)

            self.mcts.run(root, self.network)

            error = root.value() - initial_inference.value.item()
            game.history.errors.append(error)

            action = self.config.select_action(root, self.temperature)

            game.apply(action)
            game.store_search_statistics(root)

            self.experiences_collected += 1

            if self.experiences_collected % self.config.weight_sync_frequency == 0:
                self.sync_weights()

            save_history = (
                game.history_idx -
                game.previous_collect_to) == self.config.max_history_length
            if save_history or game.done or game.terminal:
                overlap = self.config.num_unroll_steps + self.config.td_steps
                if not game.history.dones[game.previous_collect_to - 1]:
                    collect_from = max(0, game.previous_collect_to - overlap)
                else:
                    collect_from = game.previous_collect_to
                history = game.get_history_sequence(collect_from)
                ignore = overlap if not game.done else None
                self.replay_buffer.save_history.remote(history,
                                                       ignore=ignore,
                                                       terminal=game.terminal)

            if game.step >= self.config.max_steps:
                self.environment.was_real_done = True
                break

        if self.config.two_players:
            self.stats_to_log[game.info["result"]] += 1

Example #2

Show file

def play_game(config: MuZeroConfig, network: Network) -> Game:
    game = Game.from_config(config)

    while not game.terminal() and len(game.history) < config.max_moves:
        # At the root of the search tree we use the representation function to
        # obtain a hidden state given the current observation.
        root = Node(0)
        last_observation = game.make_image(-1)
        root.expand(game.to_play(), game.legal_actions(),
                    network.initial_inference(last_observation).numpy())
        root.add_exploration_noise(config)

        # logging.debug('Running MCTS on step {}.'.format(len(game.history)))
        # We then run a Monte Carlo Tree Search using only action sequences and the
        # model learned by the network.
        run_mcts(config, root, game.action_history(), network)
        action = root.select_action(config, len(game.history), network)
        game.apply(action)
        game.store_search_statistics(root)

    logging.info('Finished episode at step {} | cumulative reward: {}' \
        .format(len(game.obs_history), sum(game.rewards)))

    return game

Example #3

Show file

File: evaluate.py Project: JimOhman/model-based-rl

  def play_game(self, environment):
    assert self.network is not None, ".load_network() needs to be called before playing."

    game = self.config.new_game(environment)

    if self.config.save_mcts:
      path_to_mcts_folder = os.path.split(os.path.normpath(self.config.saves_dir))[0]
      path_to_mcts_folder = os.path.join(path_to_mcts_folder, 'mcts')
      os.makedirs(path_to_mcts_folder, exist_ok=True)

    if self.config.save_gif_as:
      path_to_gif_folder = os.path.split(os.path.normpath(self.config.saves_dir))[0]
      path_to_gif_folder = os.path.join(path_to_gif_folder, 'gifs')
      os.makedirs(path_to_gif_folder, exist_ok=True)

    frames = []
    game.pred_values = []
    game.pred_rewards = []
    game.search_depths = []
    while not game.terminal:
      root = Node(0)

      current_observation = np.float32(game.get_observation(-1))
      if self.config.norm_obs:
        current_observation = (current_observation - self.obs_min) / self.obs_range
      current_observation = torch.from_numpy(current_observation).to(self.device)

      initial_inference = self.network.initial_inference(current_observation.unsqueeze(0))
      
      legal_actions = game.environment.legal_actions()
      root.expand(initial_inference, game.to_play, legal_actions)

      if self.config.use_exploration_noise:
        root.add_exploration_noise(self.config.root_dirichlet_alpha, self.config.root_exploration_fraction)

      actions_to_apply, corresponding_rewards = [], []
      if self.config.only_prior:
        _, action = max([(child.prior, action) for action, child in root.children.items()])
        reward = self.network.recurrent_inference(root.hidden_state, [action]).reward.item()
        actions_to_apply.append(action)
        corresponding_rewards.append(reward)
        root.children[action].visit_count += 1
        game.search_depths.append([0])

      elif self.config.only_value:
        q_values = []
        max_q_val = -np.inf
        for action in root.children.keys():
          output = self.network.recurrent_inference(root.hidden_state, [action])
          if self.config.two_players:
            q_val = (output.reward - self.config.discount * output.value).item()
          else:
            q_val = (output.reward + self.config.discount * output.value).item()
          if q_val > max_q_val:
            max_q_val = q_val
            chosen_action = action
            reward = output.reward.item()
          root.children[action].visit_count += 1

        actions_to_apply.append(chosen_action)
        corresponding_rewards.append(reward)
        game.search_depths.append([1])

      else:
        search_paths = self.mcts.run(root, self.network)
        search_depths = [len(search_path) for search_path in search_paths]
        game.search_depths.append(search_depths)

        if self.config.save_mcts and game.step >= self.config.save_mcts_after_step:
          path_to_file = os.path.join(path_to_mcts_folder, str(game.step) + '.png')
          write_mcts_as_png(search_paths, path_to_file=path_to_file)

        node = root
        actions_applied = 0
        while node.expanded():
          action = self.config.select_action(node, temperature=self.config.temperature)
          reward = node.children[action].reward
          node = node.children[action]

          actions_to_apply.append(action)
          corresponding_rewards.append(reward)
          actions_applied += 1

          if actions_applied == self.config.apply_mcts_actions:
            break

      game.pred_values.append(initial_inference.value.item())
      game.store_search_statistics(root)
      
      for action, reward in zip(actions_to_apply, corresponding_rewards):
        game.pred_rewards.append(reward)
        if self.config.two_players:
          if game.to_play == self.config.random_opp:
            action = np.random.choice(legal_actions)
          elif game.to_play == self.config.human_opp:
            print("waiting for your input: {}".format(legal_actions))
            action = int(input())
            while action not in legal_actions:
              print("invalid action, choose again!")
              action = int(input())
          to_play = game.to_play

        game.apply(action)

        if self.config.verbose:
          prior_policy = [round(child.prior, 2) for child in root.children.values()]
          sum_visits = sum(child.visit_count for child in root.children.values())
          mcts_policy = [round(child.visit_count/sum_visits, 2) for child in root.children.values()]
          print("\nstep {}".format(game.step))
          print("   legal actions: {}".format(list(legal_actions)))
          print("   prior policy:  {}".format(prior_policy))
          print("   mcts policy:   {}".format(mcts_policy))
          print("   prior value:    {}".format(round(game.pred_values[-1], 2)))
          print("   mcts value:    {}".format(round(root.value(), 2)))

        if self.config.render:
          try:
            frame = game.environment.unwrapped._get_image()
            self.viewer.imshow(frame)
          except:
            frame = game.environment.render(mode='rgb_array')
          frames.append(frame)

          if self.config.sleep:
            time.sleep(self.config.sleep)

        if game.terminal or game.step >= self.config.max_steps:
          environment.was_real_done = True
          game.terminal = True
          if self.config.two_players:
            if to_play in [self.config.random_opp, self.config.human_opp]:
              game.history.rewards[-1] *= -1
          break

    msg = "\033[92m[Game done]\033[0m --> "
    msg += "length: {:.1f}, return: {:.1f}, pred return: {:.1f}, pred value: {:.1f}, mcts value: {:.1f}"
    print(msg.format(game.step, np.sum(game.history.rewards), np.sum(game.pred_rewards),
                     np.mean(game.pred_values), np.mean(game.history.root_values)))

    if self.config.save_gif_as and frames:
      filename = self.config.save_gif_as + '.gif'
      self.save_frames_as_gif(frames, path_to_gif_folder, filename)

    return game