Esempi in Python per MCTSBot, esempi in Python per open_spiel.python.algorithms.mcts.MCTSBot

Esempio n. 1

0

Mostra file

File: mcts.py Progetto: julianhartmann1/HCII

def _init_bot(bot_type, game, player_id):
  """Initializes a bot by type."""
  rng = np.random.RandomState(FLAGS.seed)
  if bot_type == "mcts":
    evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count, rng)
    return mcts.MCTSBot(
        game,
        FLAGS.uct_c,
        FLAGS.max_simulations,
        evaluator,
        random_state=rng,
        solve=FLAGS.solve,
        verbose=FLAGS.verbose)
  if bot_type == "az":
    model = az_model.Model.from_checkpoint(FLAGS.az_path)
    evaluator = az_evaluator.AlphaZeroEvaluator(game, model)
    return mcts.MCTSBot(
        game,
        FLAGS.uct_c,
        FLAGS.max_simulations,
        evaluator,
        random_state=rng,
        child_selection_fn=mcts.SearchNode.puct_value,
        solve=FLAGS.solve,
        verbose=FLAGS.verbose)
  if bot_type == "random":
    return uniform_random.UniformRandomBot(player_id, rng)
  if bot_type == "human":
    return human.HumanBot()
  if bot_type == "gtp":
    bot = gtp.GTPBot(game, FLAGS.gtp_path)
    for cmd in FLAGS.gtp_cmd:
      bot.gtp_cmd(cmd)
    return bot
  raise ValueError("Invalid bot type: %s" % bot_type)

Esempio n. 2

0

Mostra file

 def test_can_play_tic_tac_toe(self):
   game = pyspiel.load_game("tic_tac_toe")
   max_simulations = 100
   evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20)
   bots = [
       mcts.MCTSBot(game, UCT_C, max_simulations, evaluator),
       mcts.MCTSBot(game, UCT_C, max_simulations, evaluator),
   ]
   v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random)
   self.assertEqual(v[0] + v[1], 0)

Esempio n. 3

0

Mostra file

 def test_can_play_three_player_stochastic_games(self):
   game = pyspiel.load_game("pig(players=3,winscore=20,horizon=30)")
   max_simulations = 100
   evaluator = mcts.RandomRolloutEvaluator(n_rollouts=5)
   bots = [
       mcts.MCTSBot(game, UCT_C, max_simulations, evaluator),
       mcts.MCTSBot(game, UCT_C, max_simulations, evaluator),
       mcts.MCTSBot(game, UCT_C, max_simulations, evaluator),
   ]
   v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random)
   self.assertEqual(sum(v), 0)

Esempio n. 4

0

Mostra file

File: mcts_test.py Progetto: DailyActie/AI_RL_APP-open_spiel

 def test_can_play_tic_tac_toe(self):
     game = pyspiel.load_game("tic_tac_toe")
     uct_c = math.sqrt(2)
     max_search_nodes = 100
     evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20)
     bots = [
         mcts.MCTSBot(game, 0, uct_c, max_search_nodes, evaluator),
         mcts.MCTSBot(game, 1, uct_c, max_search_nodes, evaluator),
     ]
     v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots,
                                     np.random)
     self.assertEqual(v[0] + v[1], 0)

Esempio n. 5

0

Mostra file

File: mcts_test.py Progetto: ray1201/open_spiel

 def test_can_play_three_player_game(self):
     game = pyspiel.load_game("pig(players=3,winscore=20,horizon=30)")
     uct_c = math.sqrt(2)
     max_search_nodes = 100
     evaluator = mcts.RandomRolloutEvaluator(n_rollouts=5)
     bots = [
         mcts.MCTSBot(game, 0, uct_c, max_search_nodes, evaluator),
         mcts.MCTSBot(game, 1, uct_c, max_search_nodes, evaluator),
         mcts.MCTSBot(game, 2, uct_c, max_search_nodes, evaluator),
     ]
     v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots,
                                     np.random)
     self.assertEqual(sum(v), 0)

Esempio n. 6

0

Mostra file

File: game_utils.py Progetto: michaelkaisers/alphazero-openspiel

def test_zero_vs_mcts(policy_fn, max_search_nodes, game_name, **kwargs):
    game = pyspiel.load_game(game_name)

    # Alphazero first
    zero_bot = AlphaZeroBot(game, 0, policy_fn=policy_fn, use_dirichlet=False, **kwargs)
    mcts_bot = mcts.MCTSBot(game, 1, 1,
                            max_search_nodes, mcts.RandomRolloutEvaluator(1))
    score1 = play_game(game, zero_bot, mcts_bot)

    # Random bot first
    zero_bot = AlphaZeroBot(game, 1, policy_fn=policy_fn, use_dirichlet=False, **kwargs)
    mcts_bot = mcts.MCTSBot(game, 0, 1,
                            max_search_nodes, mcts.RandomRolloutEvaluator(1))
    score2 = -play_game(game, mcts_bot, zero_bot)
    return score1, score2, None

Esempio n. 7

0

Mostra file

File: game_utils.py Progetto: michaelkaisers/alphazero-openspiel

def test_net_vs_mcts(policy_fn, max_search_nodes, game_name, **kwargs):
    game = pyspiel.load_game(game_name)

    # Alphazero first
    zero_bot = NeuralNetBot(game, 0, policy_fn)
    mcts_bot = mcts.MCTSBot(game, 1, 1,
                            max_search_nodes, mcts.RandomRolloutEvaluator(1))
    score1 = play_game(game, zero_bot, mcts_bot)

    # Random bot first
    zero_bot = NeuralNetBot(game, 1, policy_fn)
    mcts_bot = mcts.MCTSBot(game, 0, 1,
                            max_search_nodes, mcts.RandomRolloutEvaluator(1))
    score2 = -play_game(game, mcts_bot, zero_bot)
    return score1, score2, None

Esempio n. 8

0

Mostra file

 def test_can_play_single_player(self):
   game = pyspiel.load_game("catch")
   max_simulations = 100
   evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20)
   bots = [mcts.MCTSBot(game, UCT_C, max_simulations, evaluator)]
   v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random)
   self.assertGreater(v[0], 0)

Esempio n. 9

0

Mostra file

def new_mcts_bot(game, max_sims, evaluator):
  if max_sims < 2:
    raise RuntimeError('max_sims must be > 1 ... I think the implementation is broken')
  return mcts.MCTSBot(
      game,
      uct_c=math.sqrt(2),
      max_simulations=max_sims,
      evaluator=evaluator)

Esempio n. 10

0

Mostra file

File: evaluator_test.py Progetto: ngrupen/open_spiel

 def test_works_with_mcts(self):
   game = pyspiel.load_game("tic_tac_toe")
   model = build_model(game)
   evaluator = evaluator_lib.AlphaZeroEvaluator(game, model)
   bot = mcts.MCTSBot(
       game, 1., 20, evaluator, solve=False, dirichlet_noise=(0.25, 1.))
   root = bot.mcts_search(game.new_initial_state())
   self.assertEqual(root.explore_count, 20)

Esempio n. 11

0

Mostra file

File: mcts_test.py Progetto: BlueBerryBread/MyOpenSpiel

 def test_throws_on_simultaneous_game(self):
     game = pyspiel.load_game("matrix_mp")
     evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20)
     with self.assertRaises(ValueError):
         mcts.MCTSBot(game,
                      0,
                      UCT_C,
                      max_simulations=100,
                      evaluator=evaluator)

Esempio n. 12

0

Mostra file

def main(_):
  game = pyspiel.load_game("tic_tac_toe")

  # 1. Define a model
  model = model_lib.Model(
      FLAGS.nn_model, game.observation_tensor_shape(),
      game.num_distinct_actions(), nn_width=FLAGS.nn_width,
      nn_depth=FLAGS.nn_depth, weight_decay=1e-4, learning_rate=0.01, path=None)
  print("Model type: {}({}, {}), size: {} variables".format(
      FLAGS.nn_model, FLAGS.nn_width, FLAGS.nn_depth,
      model.num_trainable_variables))

  # 2. Create an MCTS bot using the model
  evaluator = evaluator_lib.AlphaZeroEvaluator(game, model)
  bot = mcts.MCTSBot(game,
                     1.,
                     20,
                     evaluator,
                     solve=False,
                     dirichlet_noise=(0.25, 1.))

  # 3. Build an AlphaZero instance
  a0 = alpha_zero.AlphaZero(game,
                            bot,
                            model,
                            replay_buffer_capacity=FLAGS.replay_buffer_capacity,
                            action_selection_transition=4)

  # 4. Create a bot using min-max search. It can never lose tic-tac-toe, so
  # a success condition for our AlphaZero bot is to draw all games with it.
  minimax_bot = MinimaxBot(game)

  # 5. Run training loop
  for num_round in range(FLAGS.num_rounds):
    logging.info("------------- Starting round %s out of %s -------------",
                 num_round, FLAGS.num_rounds)

    if num_round % FLAGS.evaluation_frequency == 0:
      num_evaluations = 50
      logging.info("Playing %s games against the minimax player.",
                   num_evaluations)
      (_, losses, draws) = bot_evaluation(game, [minimax_bot, a0.bot],
                                          num_evaluations=50)
      logging.info("Result against Minimax player: %s losses and %s draws.",
                   losses, draws)

    logging.info("Running %s games of self play", FLAGS.num_self_play_games)
    a0.self_play(num_self_play_games=FLAGS.num_self_play_games)

    logging.info("Training the net for %s epochs.", FLAGS.num_training_epochs)
    a0.update(FLAGS.num_training_epochs,
              batch_size=FLAGS.batch_size,
              verbose=True)
    logging.info("Cache: %s", evaluator.cache_info())
    evaluator.clear_cache()

Esempio n. 13

0

Mostra file

File: mcts.py Progetto: DailyActie/AI_RL_APP-open_spiel

def main(unused_argv):
  uct_c = 2
  game = pyspiel.load_game(FLAGS.game)
  state = game.new_initial_state()
  print("Initial state: ", str(state))

  # Check that the games satisfies the conditions for the implemented MCTS
  # algorithm
  if game.num_players() not in (1, 2):
    raise ValueError("Game must be a 1-player game or 2-player zero-sum game")
  if (game.num_players() == 2 and
      game.get_type().utility != pyspiel.GameType.Utility.ZERO_SUM):
    raise ValueError("Game must be a 1-player game or 2-player zero-sum game")

  # Create MCTS bot
  evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count)
  mcts_bot = mcts.MCTSBot(game, FLAGS.mcts_player, uct_c,
                          FLAGS.max_search_nodes, evaluator)

  # Create random bot
  random_bot = pyspiel.make_uniform_random_bot(game, 1 - FLAGS.mcts_player, 123)

  if FLAGS.mcts_player == 0:
    bots = [mcts_bot, random_bot]
  else:
    bots = [random_bot, mcts_bot]

  while not state.is_terminal():
    # The state can be three different types: chance node,
    # simultaneous node, or decision node
    if state.is_chance_node():
      # Chance node: sample an outcome
      outcomes = state.chance_outcomes()
      num_actions = len(outcomes)
      print("Chance node, got " + str(num_actions) + " outcomes")
      action_list, prob_list = zip(*outcomes)
      action = np.random.choice(action_list, p=prob_list)
      print("Sampled outcome: ",
            state.action_to_string(state.current_player(), action))
      state.apply_action(action)
    elif state.is_simultaneous_node():
      raise ValueError("Game cannot have simultaneous nodes.")
    else:
      # Decision node: sample action for the single current player
      _, action = bots[state.current_player()].step(state)
      print("Player ", state.current_player(), ", randomly sampled action: ",
            state.action_to_string(state.current_player(), action))
      state.apply_action(action)

    print("Next state: ", str(state))

  # Game is now done. Print return for each player
  returns = state.returns()
  for pid in range(game.num_players()):
    print("Return for player {} is {}".format(pid, returns[pid]))

Esempio n. 14

0

Mostra file

File: mcts.py Progetto: leedavid/open_spiel

def _init_bot(bot_type, game, player_id):
    """Initializes a bot by type."""
    if bot_type == "mcts":
        evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count)
        return mcts.MCTSBot(game, player_id, FLAGS.uct_c,
                            FLAGS.max_search_nodes, evaluator)
    if bot_type == "random":
        return uniform_random.UniformRandomBot(game, player_id, np.random)
    if bot_type == "human":
        return human.HumanBot(game, player_id)
    raise ValueError("Invalid bot type: %s" % bot_type)

Esempio n. 15

0

Mostra file

def local_random_vs_mcts():
    print("local_random_vs_mcts")
    game = pyspiel.load_game("tic_tac_toe")
    b1 = uniform_random.UniformRandomBot(0, np.random.RandomState())
    b2 = mcts.MCTSBot(
        game,
        uct_c=math.sqrt(2),
        # starts beating random bot at ~ 3 sims, 1 rollout
        max_simulations=3,
        evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2))
    print_games_per_second(game, b1, b2, time_limit_s=3)

Esempio n. 16

0

Mostra file

File: alpha_zero.py Progetto: ngrupen/open_spiel

def _init_bot(config, game, evaluator_, evaluation):
  """Initializes a bot."""
  noise = None if evaluation else (config.policy_epsilon, config.policy_alpha)
  return mcts.MCTSBot(
      game,
      config.uct_c,
      config.max_simulations,
      evaluator_,
      solve=False,
      dirichlet_noise=noise,
      child_selection_fn=mcts.SearchNode.puct_value,
      verbose=False)

Esempio n. 17

0

Mostra file

File: mcts_agent.py Progetto: sergmister/open_spiel

 def __init__(self, rollout_count=5, max_simulations=4000):
     rng = np.random.RandomState(None)
     evaluator = mcts.RandomRolloutEvaluator(rollout_count, rng)
     self.game = pyspiel.load_game("geodesic_y")
     self.state = self.game.new_initial_state()
     self.agent = mcts.MCTSBot(self.game,
                               2,
                               max_simulations,
                               evaluator,
                               random_state=rng,
                               solve=True,
                               verbose=False)

Esempio n. 18

0

Mostra file

File: mcts_test.py Progetto: ray1201/open_spiel

def search_tic_tac_toe_state(initial_actions):
    game = pyspiel.load_game("tic_tac_toe")
    state = game.new_initial_state()
    for action_str in initial_actions.split(" "):
        state.apply_action(_get_action(state, action_str))
    bot = mcts.MCTSBot(game,
                       player=state.current_player(),
                       uct_c=math.sqrt(2),
                       max_simulations=10000,
                       solve=True,
                       evaluator=mcts.RandomRolloutEvaluator(n_rollouts=20))
    return bot.mcts_search(state), state

Esempio n. 19

0

Mostra file

def search_tic_tac_toe_state(initial_actions):
    game = pyspiel.load_game("tic_tac_toe")
    state = game.new_initial_state()
    for action_str in initial_actions.split(" "):
        state.apply_action(_get_action(state, action_str))
    rng = np.random.RandomState(42)
    bot = mcts.MCTSBot(game,
                       UCT_C,
                       max_simulations=10000,
                       solve=True,
                       random_state=rng,
                       evaluator=mcts.RandomRolloutEvaluator(n_rollouts=20,
                                                             random_state=rng))
    return bot.mcts_search(state), state

Esempio n. 20

0

Mostra file

    def test_mcts_vs_random_game(self):
        server = self._start_game_server("tcp://*:5555")
        game = NetworkGame("tcp://localhost:5555")
        mcts_bot = mcts.MCTSBot(
            game,
            uct_c=math.sqrt(2),
            max_simulations=2,
            evaluator=mcts.RandomRolloutEvaluator(n_rollouts=1))
        random_bot = uniform_random.UniformRandomBot(0,
                                                     np.random.RandomState())

        self._play_one_game(game, mcts_bot, random_bot)

        game.exit()
        server.join()

Esempio n. 21

0

Mostra file

def random_vs_remote_mcts():
    print("random_vs_remote_mcts")
    server = start_game_server("tcp://*:5555")
    game = NetworkGame("tcp://localhost:5555")
    random_bot = uniform_random.UniformRandomBot(0, np.random.RandomState())
    mcts_bot = mcts.MCTSBot(
        game,
        uct_c=math.sqrt(2),
        # starts beating random bot at ~ 3 sims, 1 rollout
        max_simulations=3,
        evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2))

    print_games_per_second(game, random_bot, mcts_bot, time_limit_s=3)

    game.exit()
    server.join()

Esempio n. 22

0

Mostra file

def main():
    game = NetworkGame("tcp://localhost:5555")
    # bot = uniform_random.UniformRandomBot(0, np.random.RandomState())
    bot = mcts.MCTSBot(game,
                       uct_c=math.sqrt(2),
                       max_simulations=3,
                       evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2))
    state = game.new_initial_state()

    while not state.is_terminal():
        action = bot.step(state)
        print('bot action:', action)
        state.apply_action(action)

    game.exit()
    print("done")
    print(state)

Esempio n. 23

0

Mostra file

def evaluator(*, game, config, logger, checkpoint, queue):
    """A process that plays the latest checkpoint vs standard MCTS."""
    results = Buffer(config.evaluation_window)
    logger.print("Initializing model")

    # Load a new model if there's not a checkpoint, otherwise load the checkpoint.
    if checkpoint is None:
        model = _init_model_from_config(config)
    else:
        model = _init_model_from_checkpoint(checkpoint, config.path)

    logger.print("Initializing bots")
    az_evaluator = evaluator_lib.AlphaZeroEvaluator(game, model)
    random_evaluator = mcts.RandomRolloutEvaluator()

    for game_num in itertools.count():
        if not update_checkpoint(logger, queue, model, az_evaluator):
            return

        az_player = game_num % 2
        difficulty = (game_num // 2) % config.eval_levels
        max_simulations = int(config.max_simulations * (10**(difficulty / 2)))
        bots = [
            _init_bot(config, game, az_evaluator, True),
            mcts.MCTSBot(game,
                         config.uct_c,
                         max_simulations,
                         random_evaluator,
                         solve=True,
                         verbose=False)
        ]
        if az_player == 1:
            bots = list(reversed(bots))

        trajectory = _play_game(logger,
                                game_num,
                                game,
                                bots,
                                temperature=1,
                                temperature_drop=0)
        results.append(trajectory.returns[az_player])
        queue.put((difficulty, trajectory.returns[az_player]))

        logger.print("AZ: {}, MCTS: {}, AZ avg/{}: {:.3f}".format(
            trajectory.returns[az_player], trajectory.returns[1 - az_player],
            len(results), np.mean(results.data)))

Esempio n. 24

0

Mostra file

def _init_bot(bot_type, game, player_id):
  """Initializes a bot by type."""
  rng = np.random.RandomState(FLAGS.seed)
  if bot_type == "mcts":
    evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count, rng)
    return mcts.MCTSBot(
        game,
        FLAGS.uct_c,
        FLAGS.max_simulations,
        evaluator,
        random_state=rng,
        solve=FLAGS.solve,
        verbose=FLAGS.verbose)
  if bot_type == "random":
    return uniform_random.UniformRandomBot(player_id, rng)
  if bot_type == "human":
    return human.HumanBot()
  raise ValueError("Invalid bot type: %s" % bot_type)

Esempio n. 25

0

Mostra file

File: alpha_zero.py Progetto: taasawat/open_spiel

def evaluator(*, game, config, logger, num, queue):
    """A process that plays the latest checkpoint vs standard MCTS."""
    max_simulations = config.max_simulations * (3**num)
    logger.print("Running MCTS with", max_simulations, "simulations")
    results = Buffer(config.evaluation_window)
    logger.print("Initializing model")
    model = _init_model_from_config(config)
    logger.print("Initializing bots")
    az_evaluator = evaluator_lib.AlphaZeroEvaluator(game, model)
    random_evaluator = mcts.RandomRolloutEvaluator()
    az_player = 0
    bots = [
        _init_bot(config, game, az_evaluator, True),
        mcts.MCTSBot(game,
                     config.uct_c,
                     max_simulations,
                     random_evaluator,
                     solve=True,
                     verbose=False)
    ]
    for game_num in itertools.count():
        if not update_checkpoint(logger, queue, model, az_evaluator):
            return

        trajectory = _play_game(logger,
                                game_num,
                                game,
                                bots,
                                temperature=1,
                                temperature_drop=0)
        results.append(trajectory.returns[az_player])

        logger.print("AZ: {}, MCTS: {}, AZ avg/{}: {:.3f}".format(
            trajectory.returns[az_player], trajectory.returns[1 - az_player],
            len(results), np.mean(results.data)))

        # Swap players for the next game
        bots = list(reversed(bots))
        az_player = 1 - az_player

Esempio n. 26

0

Mostra file

File: tic_tac_toe_alpha_zero.py Progetto: liu6023952/open_spiel

def main(_):
  game = pyspiel.load_game("tic_tac_toe")
  num_actions = game.num_distinct_actions()
  observation_shape = game.observation_tensor_shape()

  # 1. Define a keras net
  if FLAGS.net_type == "resnet":
    net = alpha_zero.keras_resnet(
        observation_shape, num_actions, num_residual_blocks=1, num_filters=256,
        data_format="channels_first")
  elif FLAGS.net_type == "mlp":
    net = alpha_zero.keras_mlp(
        observation_shape, num_actions, num_layers=2, num_hidden=64)
  else:
    raise ValueError(("Invalid value for 'net_type'. Must be either 'mlp' or "
                      "'resnet', but was %s") % FLAGS.net_type)

  model = alpha_zero.Model(
      net, l2_regularization=1e-4, learning_rate=0.01, device=FLAGS.device)

  # 2. Create an MCTS bot using the previous keras net
  evaluator = alpha_zero.AlphaZeroKerasEvaluator(game, model)

  bot = mcts.MCTSBot(game,
                     1.,
                     20,
                     evaluator,
                     solve=False,
                     dirichlet_noise=(0.25, 1.))

  # 3. Build an AlphaZero instance
  a0 = alpha_zero.AlphaZero(game,
                            bot,
                            model,
                            replay_buffer_capacity=FLAGS.replay_buffer_capacity,
                            action_selection_transition=4)

  # 4. Create a bot using min-max search. It can never lose tic-tac-toe, so
  # a success condition for our AlphaZero bot is to draw all games with it.
  minimax_bot = MinimaxBot(game)

  # 5. Run training loop
  for num_round in range(FLAGS.num_rounds):
    logging.info("------------- Starting round %s out of %s -------------",
                 num_round, FLAGS.num_rounds)

    if num_round % FLAGS.evaluation_frequency == 0:
      num_evaluations = 50
      logging.info("Playing %s games against the minimax player.",
                   num_evaluations)
      (_, losses, draws) = bot_evaluation(game, [minimax_bot, a0.bot],
                                          num_evaluations=50)
      logging.info("Result against Minimax player: %s losses and %s draws.",
                   losses, draws)

    logging.info("Running %s games of self play", FLAGS.num_self_play_games)
    a0.self_play(num_self_play_games=FLAGS.num_self_play_games)

    logging.info("Training the net for %s epochs.", FLAGS.num_training_epochs)
    a0.update(FLAGS.num_training_epochs,
              batch_size=FLAGS.batch_size,
              verbose=True)
    evaluator.value_and_prior.cache_clear()

Esempio n. 27

0

Mostra file

File: tic_tac_toe_alpha_zero.py Progetto: dennisjay/open_spiel

def main(_):
    game = pyspiel.load_game("tic_tac_toe")
    num_actions = game.num_distinct_actions()
    observation_shape = game.observation_tensor_shape()

    # 1. Define a keras net
    if FLAGS.net_type == "resnet":
        feature_extractor = None
        net = alpha_zero.keras_resnet(observation_shape,
                                      num_actions=num_actions,
                                      num_residual_blocks=1,
                                      num_filters=256,
                                      data_format="channels_first")
    elif FLAGS.net_type == "mlp":
        # The full length-27 observation_tensor could be trained on. But this
        # demonstrates the use of a custom feature extractor, and the training
        # can be faster with this smaller feature representation.
        feature_extractor = mlp_feature_extractor
        net = alpha_zero.keras_mlp(9, num_actions, num_layers=2, num_hidden=64)
    else:
        raise ValueError(
            ("Invalid value for 'net_type'. Must be either 'mlp' or "
             "'resnet', but was %s") % FLAGS.net_type)

    # 2. Create an MCTS bot using the previous keras net
    evaluator = alpha_zero.AlphaZeroKerasEvaluator(
        net,
        optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
        device=FLAGS.device,
        feature_extractor=feature_extractor)

    bot = mcts.MCTSBot(game,
                       1.,
                       20,
                       evaluator,
                       solve=False,
                       dirichlet_noise=(0.25, 1.))

    # 3. Build an AlphaZero instance
    a0 = alpha_zero.AlphaZero(
        game,
        bot,
        replay_buffer_capacity=FLAGS.replay_buffer_capacity,
        action_selection_transition=4)

    # 4. Create a bot using min-max search. It can never lose tic-tac-toe, so
    # a success condition for our AlphaZero bot is to draw all games with it.
    minimax_bot = MinimaxBot(game)

    # 5. Run training loop
    for num_round in range(FLAGS.num_rounds):
        logging.info("------------- Starting round %s out of %s -------------",
                     num_round, FLAGS.num_rounds)

        if num_round % FLAGS.evaluation_frequency == 0:
            num_evaluations = 50
            logging.info("Playing %s games against the minimax player.",
                         num_evaluations)
            (_, losses, draws) = bot_evaluation(game, [minimax_bot, a0.bot],
                                                num_evaluations=50)
            logging.info(
                "Result against Minimax player: %s losses and %s draws.",
                losses, draws)

        logging.info("Running %s games of self play",
                     FLAGS.num_self_play_games)
        a0.self_play(num_self_play_games=FLAGS.num_self_play_games)

        logging.info("Training the net for %s epochs.",
                     FLAGS.num_training_epochs)
        a0.update(FLAGS.num_training_epochs,
                  batch_size=FLAGS.batch_size,
                  verbose=True)

Esempio n. 28

0

Mostra file

File: MCTS_Player.py Progetto: fkanvaly/AlphaQuoridor

 def __init__(self, idx, max_simulations=500):
     super().__init__(idx)
     game = pyspiel.load_game("quoridor")
     self.evaluator = RandomRolloutEvaluator(2)
     self.bot = mcts.MCTSBot(game, UCT_C, max_simulations, self.evaluator)