예제 #1
0
def nfsp_measure_exploitability_nonlstm(rllib_policies: List[Policy],
                                        poker_game_version: str,
                                        open_spiel_env_config: dict = None):
    if open_spiel_env_config is None:
        if poker_game_version in ["kuhn_poker", "leduc_poker"]:
            open_spiel_env_config = {
                "players": pyspiel.GameParameter(2)
            }
        else:
            open_spiel_env_config = {}

    open_spiel_env_config = {k: pyspiel.GameParameter(v) if not isinstance(v, pyspiel.GameParameter) else v for k, v in
                             open_spiel_env_config.items()}

    openspiel_game = pyspiel.load_game(poker_game_version, open_spiel_env_config)
    if poker_game_version == "oshi_zumo":
        openspiel_game = pyspiel.convert_to_turn_based(openspiel_game)

    opnsl_policies = []
    for rllib_policy in rllib_policies:
        openspiel_policy = openspiel_policy_from_nonlstm_rllib_policy(openspiel_game=openspiel_game,
                                                                      rllib_policy=rllib_policy,
                                                                      game_version=poker_game_version,
                                                                      game_parameters=open_spiel_env_config,
        )
        opnsl_policies.append(openspiel_policy)

    nfsp_policy = JointPlayerPolicy(game=openspiel_game, policies=opnsl_policies)

    # Exploitability is NashConv / num_players
    if poker_game_version == "universal_poker":
        print("Measuring exploitability for universal_poker policy. This will take a while...")
    exploitability_result = exploitability(game=openspiel_game, policy=nfsp_policy)
    return exploitability_result
def main(argv):
  if len(argv) > 1:
    raise app.UsageError("Too many command-line arguments.")
  game = pyspiel.load_game(
      "bridge_uncontested_bidding", {
          "relative_scoring": pyspiel.GameParameter(True),
          "rng_seed": pyspiel.GameParameter(FLAGS.rng_seed),
      })
  bots = [
      bluechip_bridge_uncontested_bidding.BlueChipBridgeBot(
          game, 0, _WBridge5Client(FLAGS.bot_cmd)),
      bluechip_bridge_uncontested_bidding.BlueChipBridgeBot(
          game, 1, _WBridge5Client(FLAGS.bot_cmd)),
  ]
  results = []

  for i_deal in range(FLAGS.num_deals):
    state = _run_once(game.new_initial_state(), bots)
    print("Deal #{}; final state:\n{}".format(i_deal, state))
    results.append(state.returns())

  stats = np.array(results)
  mean = np.mean(stats, axis=0)
  stderr = np.std(stats, axis=0, ddof=1) / np.sqrt(FLAGS.num_deals)
  print(u"Absolute score: {:+.1f}\u00b1{:.1f}".format(mean[0], stderr[0]))
  print(u"Relative score: {:+.1f}\u00b1{:.1f}".format(mean[1], stderr[1]))
예제 #3
0
    def test_solve_small_oshi_zumo(self):
        # Oshi-Zumo(5, 2, 0)
        game = pyspiel.load_game("oshi_zumo", {
            "coins": pyspiel.GameParameter(5),
            "size": pyspiel.GameParameter(2)
        })
        values = value_iteration.value_iteration(game,
                                                 depth_limit=-1,
                                                 threshold=1e-6,
                                                 cyclic_game=True)

        initial_state = game.new_initial_state()
        # Symmetric game: value is 0
        self.assertAlmostEqual(values[str(initial_state)], 0)

        # Oshi-Zumo(5, 2, 1)
        game = pyspiel.load_game(
            "oshi_zumo", {
                "coins": pyspiel.GameParameter(5),
                "size": pyspiel.GameParameter(2),
                "min_bid": pyspiel.GameParameter(1)
            })
        values = value_iteration.value_iteration(game,
                                                 depth_limit=-1,
                                                 threshold=1e-6,
                                                 cyclic_game=False)

        initial_state = game.new_initial_state()
        # Symmetric game: value is 0
        self.assertAlmostEqual(values[str(initial_state)], 0)
 def test_game_parameters_from_string_with_options(self):
     self.assertEqual(
         pyspiel.game_parameters_from_string("foo(x=2,y=true)"), {
             "name": pyspiel.GameParameter("foo"),
             "x": pyspiel.GameParameter(2),
             "y": pyspiel.GameParameter(True)
         })
def main(_):
  action_string = None

  print("Creating game: " + FLAGS.game)
  if FLAGS.num_nodes is not None:

    distances = np.random.random((FLAGS.num_nodes,2))
    dist_mat = np.round(distance_matrix(distances, distances),2).flatten()
    generated_weights = str(dist_mat[0])
    for i in range(1,dist_mat.size):
      generated_weights+="," + str(dist_mat[i])

    game = pyspiel.load_game(FLAGS.game,
                             {"num_nodes": pyspiel.GameParameter(FLAGS.num_nodes),
                              "weights": pyspiel.GameParameter(generated_weights)})
  else:
    game = pyspiel.load_game(FLAGS.game, {"num_nodes": pyspiel.GameParameter(5),
                                          "weights": pyspiel.GameParameter("0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0")})

  # Get a new state
  if FLAGS.load_state is not None:
    # Load a specific state
    state_string = ""
    with open(FLAGS.load_state, encoding="utf-8") as input_file:
      for line in input_file:
        state_string += line
    state_string = state_string.rstrip()
    print("Loading state:")
    print(state_string)
    print("")
    state = game.deserialize_state(state_string)
  else:
    state = game.new_initial_state()

  # Print the initial state
  print(str(state))

  while not state.is_terminal():
    # The state can be three different types: chance node,
    # simultaneous node, or decision node

    legal_actions = state.legal_actions(state.current_player())
    print("Legal Actions: ", [(i//FLAGS.num_nodes, i%FLAGS.num_nodes) for i in legal_actions])
    # Decision node: sample action for the single current player
    action = random.choice(legal_actions)
    action_string = state.action_to_string(state.current_player(), action)
    print("Player ", state.current_player(), ", randomly sampled action: ",
          action_string)
    state.apply_action(action)

    print(str(state))
    
    print("Information State: ", state.information_state_string())
    print("Edge Values: ", dist_mat)

  # Game is now done. Print utilities for each player
  returns = state.returns()
  for pid in range(game.num_players()):
    print("Utility for player {} is {}".format(pid, returns[pid]))
 def test_iigoofspiel4(self):
     game = pyspiel.load_game_as_turn_based(
         "goofspiel", {
             "imp_info": pyspiel.GameParameter(True),
             "num_cards": pyspiel.GameParameter(4),
             "points_order": pyspiel.GameParameter("descending"),
         })
     val1, val2, _, _ = sequence_form_lp.solve_zero_sum_game(game)
     # symmetric game, should be 0
     self.assertAlmostEqual(val1, 0)
     self.assertAlmostEqual(val2, 0)
예제 #7
0
 def __init__(self, scale=1, shift=0):
     self.scale = scale
     self.shift = shift
     self.game = pyspiel.load_game(
         "coop_box_pushing_serial", {
             "fully_observable": pyspiel.GameParameter(True),
             "horizon": pyspiel.GameParameter(100)
         })
     self.state = self.game.new_initial_state()
     self._max_episode_steps = self.game.max_game_length()
     self.env_seed = 0
예제 #8
0
파일: poker_utils.py 프로젝트: indylab/nxdo
def nxdo_nfsp_measure_exploitability_nonlstm(
        rllib_policies: List[Policy],
        use_delegate_policy_exploration: bool,
        restricted_game_convertors: Union[
            List[RestrictedToBaseGameActionSpaceConverter],
            List[AgentRestrictedGameOpenSpielObsConversions]],
        poker_game_version: str,
        open_spiel_env_config: dict = None):
    if open_spiel_env_config is None:
        if poker_game_version in ["kuhn_poker", "leduc_poker"]:
            open_spiel_env_config = {"players": pyspiel.GameParameter(2)}
        elif poker_game_version in ["oshi_zumo_tiny"]:
            poker_game_version = "oshi_zumo"
            open_spiel_env_config = {
                "coins": pyspiel.GameParameter(6),
                "size": pyspiel.GameParameter(2),
                "horizon": pyspiel.GameParameter(8),
            }
        else:
            open_spiel_env_config = {}

    open_spiel_env_config = {
        k: pyspiel.GameParameter(v)
        if not isinstance(v, pyspiel.GameParameter) else v
        for k, v in open_spiel_env_config.items()
    }

    openspiel_game = pyspiel.load_game(poker_game_version,
                                       open_spiel_env_config)

    opnsl_policies = []
    assert isinstance(restricted_game_convertors, list)
    for action_space_converter, rllib_policy in zip(restricted_game_convertors,
                                                    rllib_policies):
        openspiel_policy = openspiel_policy_from_nonlstm_rllib_nxdo_policy(
            openspiel_game=openspiel_game,
            rllib_policy=rllib_policy,
            restricted_game_convertor=action_space_converter,
            use_delegate_policy_exploration=use_delegate_policy_exploration)
        opnsl_policies.append(openspiel_policy)

    nfsp_policy = JointPlayerPolicy(game=openspiel_game,
                                    policies=opnsl_policies)

    # Exploitability is NashConv / num_players
    if poker_game_version == "universal_poker":
        print(
            "Measuring exploitability for universal_poker policy. This will take a while..."
        )
    exploitability_result = exploitability(game=openspiel_game,
                                           policy=nfsp_policy)
    return exploitability_result
예제 #9
0
 def test_passing_params(self):
     game = pyspiel.load_game("tic_tac_toe")
     bots = [
         pyspiel.load_bot(
             "fixed_action_preference",
             game,
             player=0,
             params={"actions": pyspiel.GameParameter("0:1:2")}),
         pyspiel.load_bot("fixed_action_preference",
                          game,
                          player=1,
                          params={"actions": pyspiel.GameParameter("3:4")}),
     ]
     result = pyspiel.evaluate_bots(game.new_initial_state(), bots, seed=0)
     self.assertEqual(result, [1, -1])  # Player 0 wins.
예제 #10
0
def cfr_train(unused_arg):
    exploit_history = list()
    exploit_idx = list()

    tf.enable_eager_execution()
    game = pyspiel.load_game(FLAGS.game, {"players": pyspiel.GameParameter(2)})
    agent_name = "cfr"
    cfr_solver = cfr.CFRSolver(game)
    checkpoint = datetime.now()
    for ep in range(FLAGS.episodes):
        cfr_solver.evaluate_and_update_policy()
        if ep % 100 == 0:
            delta = datetime.now() - checkpoint
            conv = exploitability.exploitability(game,
                                                 cfr_solver.average_policy())
            exploit_idx.append(ep)
            exploit_history.append(conv)
            print(
                "Iteration {} exploitability {} - {} seconds since last checkpoint"
                .format(ep, conv, delta.seconds))
            checkpoint = datetime.now()

    pickle.dump([exploit_idx, exploit_history],
                open(
                    FLAGS.game + "_" + agent_name + "_" + str(FLAGS.episodes) +
                    ".dat", "wb"))

    now = datetime.now()
    policy = cfr_solver.average_policy()
    agent_name = "cfr"
    for pid in [1, 2]:
        policy_to_csv(
            game, policy,
            f"policies/policy_" + now.strftime("%m-%d-%Y_%H-%M") + "_" +
            agent_name + "_" + str(pid + 1) + "_+" + str(ep) + "episodes.csv")
def nfsp_measure_exploitability_nonlstm(rllib_p0_and_p1_policies,
                                        poker_game_version):
    if poker_game_version in [KUHN_POKER, LEDUC_POKER]:
        open_spiel_env_config = {"players": pyspiel.GameParameter(2)}
    else:
        open_spiel_env_config = {}

    openspiel_game = pyspiel.load_game(poker_game_version,
                                       open_spiel_env_config)
    openspiel_env = Environment(poker_game_version, open_spiel_env_config)

    openspiel_policies = []

    for rllib_policy in rllib_p0_and_p1_policies:

        if not isinstance(rllib_policy, OSPolicy):
            openspiel_policy = openspiel_policy_from_nonlstm_rllib_policy(
                openspiel_game=openspiel_game,
                poker_game_version=poker_game_version,
                rllib_policy=rllib_policy)
        else:
            openspiel_policy = rllib_policy

        openspiel_policies.append(openspiel_policy)

    nfsp_os_policy = NFSPPolicies(env=openspiel_env,
                                  nfsp_policies=openspiel_policies)

    # Exploitability is NashConv / num_players
    exploitability_result = exploitability(game=openspiel_game,
                                           policy=nfsp_os_policy)
    return exploitability_result
예제 #12
0
def main(_):
    game = pyspiel.load_game(
        FLAGS.game,
        {"players": pyspiel.GameParameter(FLAGS.players)},
    )

    if FLAGS.sampling == "external":
        solver = pyspiel.ExternalSamplingMCCFRSolver(
            game,
            avg_type=pyspiel.MCCFRAverageType.FULL,
        )
    elif FLAGS.sampling == "outcome":
        solver = pyspiel.OutcomeSamplingMCCFRSolver(game)

    for i in range(int(FLAGS.iterations / 2)):
        solver.run_iteration()
        print("Iteration {} exploitability: {:.6f}".format(
            i, pyspiel.exploitability(game, solver.average_policy())))

    print("Persisting the model...")
    with open(MODEL_FILE_NAME.format(FLAGS.sampling), "wb") as file:
        pickle.dump(solver, file, pickle.HIGHEST_PROTOCOL)

    print("Loading the model...")
    with open(MODEL_FILE_NAME.format(FLAGS.sampling), "rb") as file:
        loaded_solver = pickle.load(file)
    print("Exploitability of the loaded model: {:.6f}".format(
        pyspiel.exploitability(game, loaded_solver.average_policy())))

    for i in range(int(FLAGS.iterations / 2)):
        solver.run_iteration()
        print("Iteration {} exploitability: {:.6f}".format(
            int(FLAGS.iterations / 2) + i,
            pyspiel.exploitability(game, solver.average_policy())))
예제 #13
0
    def test_create_repeated_game(self):
        """Test both create_repeated_game function signatures."""
        repeated_game = pyspiel.create_repeated_game(
            "matrix_rps", {"num_repetitions": pyspiel.GameParameter(10)})
        state = repeated_game.new_initial_state()
        for _ in range(10):
            state.apply_actions([0, 0])
        assert state.is_terminal()

        stage_game = pyspiel.load_game("matrix_mp")
        repeated_game = pyspiel.create_repeated_game(
            stage_game, {"num_repetitions": pyspiel.GameParameter(5)})
        state = repeated_game.new_initial_state()
        for _ in range(5):
            state.apply_actions([0, 0])
        assert state.is_terminal()
  def test_gpsro(self, game_name, rnr_iterations, sims_per_entry,
                 number_players, rectify_training, training_strategy_selector,
                 meta_strategy_method):
    game = pyspiel.load_game(game_name,
                             {"players": pyspiel.GameParameter(number_players)})
    oracle = optimization_oracle.EvolutionaryStrategyOracle(
        number_policies_sampled=2, number_episodes_sampled=2)
    g_psro_solver = generalized_psro.GenPSROSolver(
        game,
        oracle,
        sims_per_entry=sims_per_entry,
        rectify_training=rectify_training,
        training_strategy_selector=training_strategy_selector,
        meta_strategy_method=meta_strategy_method)
    for _ in range(rnr_iterations):
      g_psro_solver.iteration()
    meta_game = g_psro_solver.get_meta_game
    meta_probabilities = g_psro_solver.get_and_update_meta_strategies()

    logging.info("%s %sP - %s", game_name, str(number_players),
                 meta_strategy_method)
    logging.info("Meta Strategies")
    logging.info(meta_probabilities)
    logging.info("")

    logging.info("Meta Game Values")
    logging.info(meta_game)
    logging.info("")
def main(_):
    games_list = pyspiel.registered_games()
    print("Registered games:")
    print(games_list)

    print("Creating game: " + FLAGS.game)
    if FLAGS.players is not None:
        # If passing parameters, must use game creator.
        game = pyspiel.load_game(
            FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.players)})
    else:
        # Otherwise can create directly.
        game = pyspiel.load_game(FLAGS.game)

    print("Getting all states; depth_limit = {}".format(FLAGS.depth_limit))
    all_states = get_all_states.get_all_states(game, FLAGS.depth_limit,
                                               FLAGS.include_terminals,
                                               FLAGS.include_chance_states)

    count = 0
    for state in all_states:
        print("")
        print(str(state))
        count += 1

    print("")
    print("Total: {} states.".format(count))
예제 #16
0
def main(_):
    games_list = pyspiel.registered_games()
    print("Registered games:")
    for game in games_list:
        print(" ", game.short_name)
    print()

    print("Creating game:", FLAGS.game)
    params = {}
    if FLAGS.players is not None:
        params["players"] = pyspiel.GameParameter(FLAGS.players)
    game = pyspiel.load_game(FLAGS.game, params)

    print("Getting all states; depth_limit = {}".format(FLAGS.depth_limit))
    all_states = get_all_states.get_all_states(game, FLAGS.depth_limit,
                                               FLAGS.include_terminals,
                                               FLAGS.include_chance_states)

    count = 0
    for state in all_states:
        print(state)
        count += 1

    print()
    print("Total: {} states.".format(count))
예제 #17
0
    def __init__(self,
                 env_name,
                 env_seed=2,
                 deltas=None,
                 slow_oracle_kargs=None,
                 fast_oracle_kargs=None):

        # initialize rl environment.
        from open_spiel.python import rl_environment

        import pyspiel

        self._num_players = 2
        game = pyspiel.load_game_as_turn_based(
            env_name, {"players": pyspiel.GameParameter(self._num_players)})
        self._env = rl_environment.Environment(game)

        # Each worker gets access to the shared noise table
        # with independent random streams for sampling
        # from the shared noise table.
        self.deltas = SharedNoiseTable(deltas, env_seed + 7)

        self._policies = [[] for _ in range(self._num_players)]
        self._slow_oracle_kargs = slow_oracle_kargs
        self._fast_oracle_kargs = fast_oracle_kargs
        self._delta_std = self._fast_oracle_kargs['noise']

        self._sess = tf.get_default_session()
        if self._sess is None:
            self._sess = tf.Session()

        if self._slow_oracle_kargs is not None:
            self._slow_oracle_kargs['session'] = self._sess
예제 #18
0
def xfsp_train(_):
    exploit_history = list()
    exploit_idx = list()
    game = pyspiel.load_game(FLAGS.game, {"players": pyspiel.GameParameter(2)})
    fsp_solver = fictitious_play.XFPSolver(game)
    checkpoint = datetime.now()
    for ep in range(FLAGS.episodes):
        if (ep % 1000) == 0:
            delta = datetime.now() - checkpoint
            pol = policy.PolicyFromCallable(
                game, fsp_solver.average_policy_callable())
            conv = exploitability.exploitability(game, pol)
            exploit_history.append(conv)
            exploit_idx.append(ep)
            print(
                "[XFSP] Iteration {} exploitability {} - {} seconds since last checkpoint"
                .format(ep, conv, delta.seconds))
            checkpoint = datetime.now()

        fsp_solver.iteration()

    agent_name = "xfsp"
    pickle.dump([exploit_idx, exploit_history],
                open(
                    FLAGS.game + "_" + agent_name + "_" + str(FLAGS.episodes) +
                    ".dat", "wb"))

    pol = policy.PolicyFromCallable(game, fsp_solver.average_policy_callable())
    for pid in [1, 2]:
        policy_to_csv(
            game, pol, f"policies/policy_" + now.strftime("%m-%d-%Y_%H-%M") +
            "_" + agent_name + "_" + str(pid + 1) + "_+" +
            str(FLAGS.episodes) + "episodes.csv")
예제 #19
0
def main(_):
    game = pyspiel.load_game(
        FLAGS.game,
        {"players": pyspiel.GameParameter(FLAGS.players)},
    )

    if FLAGS.solver == "cfr":
        solver = pyspiel.CFRSolver(game)
    elif FLAGS.solver == "cfrplus":
        solver = pyspiel.CFRPlusSolver(game)
    elif FLAGS.solver == "cfrbr":
        solver = pyspiel.CFRBRSolver(game)

    for i in range(int(FLAGS.iterations / 2)):
        solver.evaluate_and_update_policy()
        print("Iteration {} exploitability: {:.6f}".format(
            i, pyspiel.exploitability(game, solver.average_policy())))

    print("Persisting the model...")
    with open("{}_solver.pickle".format(FLAGS.solver), "wb") as file:
        pickle.dump(solver, file, pickle.HIGHEST_PROTOCOL)

    print("Loading the model...")
    with open("{}_solver.pickle".format(FLAGS.solver), "rb") as file:
        loaded_solver = pickle.load(file)
    print("Exploitability of the loaded model: {:.6f}".format(
        pyspiel.exploitability(game, loaded_solver.average_policy())))

    for i in range(int(FLAGS.iterations / 2)):
        loaded_solver.evaluate_and_update_policy()
        print("Iteration {} exploitability: {:.6f}".format(
            int(FLAGS.iterations / 2) + i,
            pyspiel.exploitability(game, loaded_solver.average_policy())))
예제 #20
0
    def __init__(self, game_name, discount=1.0, seed=None, **kwargs):
        """Constructor.

    Args:
      game_name: string, Open Spiel game name.
      discount: float, discount used in non-initial steps. Defaults to 1.0.
      seed: int, random number generator seed. Defaults to None.
      **kwargs: dict, additional settings passed to the Open Spiel game.
    """
        self._rng = np.random.RandomState(seed)

        game_settings = {
            key: pyspiel.GameParameter(val)
            for (key, val) in kwargs.items()
        }
        logging.info("Using game settings: %s", game_settings)
        self._game = pyspiel.load_game(game_name, game_settings)
        self._num_players = self._game.num_players()
        self._state = None
        self._should_reset = True

        # Discount returned at non-initial steps.
        self._discounts = [discount] * self._num_players

        # Decide whether to use observation or information_state
        if self._game.get_type(
        ).provides_information_state_as_normalized_vector:
            self._use_observation = False
        elif self._game.get_type().provides_observation_as_normalized_vector:
            self._use_observation = True
        else:
            raise ValueError("Game must provide either information state or "
                             "observation as a normalized vector")
예제 #21
0
def main(_):
    game = pyspiel.load_game(FLAGS.game,
                             {"players": pyspiel.GameParameter(FLAGS.players)})

    models = []
    for _ in range(game.num_players()):
        models.append(
            neurd.DeepNeurdModel(
                game,
                num_hidden_layers=FLAGS.num_hidden_layers,
                num_hidden_units=FLAGS.num_hidden_units,
                num_hidden_factors=FLAGS.num_hidden_factors,
                use_skip_connections=FLAGS.use_skip_connections,
                autoencode=FLAGS.autoencode))

    solver = neurd.CounterfactualNeurdSolver(game, models)

    def _train(model, data):
        neurd.train(model,
                    data,
                    batch_size=FLAGS.batch_size,
                    step_size=FLAGS.step_size,
                    threshold=FLAGS.threshold,
                    autoencoder_loss=(tf.compat.v1.losses.huber_loss
                                      if FLAGS.autoencode else None))

    for i in range(FLAGS.iterations):
        solver.evaluate_and_update_policy(_train)
        if i % FLAGS.print_freq == 0:
            conv = pyspiel.exploitability(game, solver.average_policy())
            print("Iteration {} exploitability {}".format(i, conv))
예제 #22
0
    def __init__(self,
                 game,
                 discount=1.0,
                 chance_event_sampler=None,
                 observation_type=None,
                 seed=None,
                 **kwargs):
        """Constructor.

    Args:
      game: [string, pyspiel.Game] Open Spiel game name or game instance.
      discount: float, discount used in non-initial steps. Defaults to 1.0.
      chance_event_sampler: optional object with `sample_external_events` method
        to sample chance events.
      observation_type: what kind of observation to use. If not specified, will
        default to INFORMATION_STATE unless the game doesn't provide it.
      **kwargs: dict, additional settings passed to the Open Spiel game.
    """
        self._chance_event_sampler = chance_event_sampler or ChanceEventSampler(
            seed=seed)

        if isinstance(game, pyspiel.Game):
            logging.info("Using game instance: %s", game.get_type().short_name)
            self._game = game
        elif kwargs:
            game_settings = {
                key: pyspiel.GameParameter(val)
                for (key, val) in kwargs.items()
            }
            logging.info("Using game settings: %s", game_settings)
            self._game = pyspiel.load_game(game, game_settings)
        else:
            logging.info("Using game string: %s", game)
            self._game = pyspiel.load_game(game)

        self._num_players = self._game.num_players()
        self._state = None
        self._should_reset = True

        # Discount returned at non-initial steps.
        self._discounts = [discount] * self._num_players

        # Determine what observation type to use.
        if observation_type is None:
            if self._game.get_type().provides_information_state_tensor:
                observation_type = ObservationType.INFORMATION_STATE
            else:
                observation_type = ObservationType.OBSERVATION

        ## Check the requested observation type is supported.
        ## laser tag's state provides observation_tensor, yet here turn_based_simultaneous game
        ## says it does not support observation_tensor
        #if observation_type == ObservationType.OBSERVATION:
        #  if not self._game.get_type().provides_observation_tensor:
        #    raise ValueError("observation_tensor not supported by " + game)
        #elif observation_type == ObservationType.INFORMATION_STATE:
        #  if not self._game.get_type().provides_information_state_tensor:
        #    raise ValueError("information_state_tensor not supported by " + game)
        self._use_observation = (
            observation_type == ObservationType.OBSERVATION)
예제 #23
0
def main(unused_argv):
    game = pyspiel.load_game(
        FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.num_players)})

    oracle = optimization_oracle.EvolutionaryStrategyOracle(
        n_evolution_tests=FLAGS.n_evolution_tests,
        number_policies_sampled=FLAGS.number_policies_sampled,
        number_episodes_sampled=FLAGS.number_episodes_sampled,
        alpha=FLAGS.alpha,
        beta=FLAGS.beta)
    g_psro_solver = generalized_psro.GenPSROSolver(
        game,
        oracle,
        sims_per_entry=FLAGS.sims_per_entry,
        meta_strategy_method='nash',
        rectify_training=FLAGS.rectify_training)
    for a in range(FLAGS.gen_psro_iterations):
        g_psro_solver.iteration()
        nash_probabilities = g_psro_solver.get_and_update_meta_strategies()
        logging.info("%s / %s", a + 1, FLAGS.gen_psro_iterations)
        logging.info(nash_probabilities)

    meta_game = g_psro_solver.get_meta_game
    meta_probabilities = g_psro_solver.get_and_update_meta_strategies()

    logging.info("%s meta probabilities", FLAGS.game)
    logging.info(meta_probabilities)
    logging.info("")

    logging.info("%s Meta Game Values", FLAGS.game)
    logging.info(meta_game)
    logging.info("")
예제 #24
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    if FLAGS.seed is None:
        seed = np.random.randint(low=0, high=1e5)
    else:
        seed = FLAGS.seed
    np.random.seed(seed)
    random.seed(seed)
    tf.set_random_seed(seed)
    game = pyspiel.load_game_as_turn_based(
        FLAGS.game_name, {"players": pyspiel.GameParameter(FLAGS.n_players)})
    env = rl_environment.Environment(game, seed=seed)
    env.reset()

    if not os.path.exists(FLAGS.root_result_folder):
        os.makedirs(FLAGS.root_result_folder)
    checkpoint_dir = 'tuning_ars' + str(
        FLAGS.iter_stop_dqn) + '_' + FLAGS.game_name + str(
            FLAGS.n_players) + '_sims_' + str(
                FLAGS.sims_per_entry) + '_it_' + str(
                    FLAGS.gpsro_iterations) + '_ep_' + str(
                        FLAGS.number_training_episodes
                    ) + '_or_' + FLAGS.oracle_type + '_arsnd_' + str(
                        FLAGS.num_directions) + '_se_' + str(
                            seed) + '_' + datetime.datetime.now().strftime(
                                '%Y-%m-%d_%H-%M-%S')
    checkpoint_dir = os.path.join(os.getcwd(), FLAGS.root_result_folder,
                                  checkpoint_dir)

    writer = SummaryWriter(logdir=checkpoint_dir + '/log')
    if FLAGS.sbatch_run:
        sys.stdout = open(checkpoint_dir + '/stdout.txt', 'w+')

    # Initialize oracle and agents
    with tf.Session() as sess:
        if FLAGS.oracle_type == "DQN":
            oracle, agents = init_dqn_responder(sess, env)
        elif FLAGS.oracle_type == "PG":
            oracle, agents = init_pg_responder(sess, env)
        elif FLAGS.oracle_type == "BR":
            oracle, agents = init_br_responder(env)
        elif FLAGS.oracle_type == "ARS":
            oracle, agents = init_ars_responder(sess, env)
        elif FLAGS.oracle_type == "ARS_parallel":
            oracle, agents = init_ars_parallel_responder(sess, env, None)
        sess.run(tf.global_variables_initializer())

        gpsro_looper(env,
                     oracle,
                     agents,
                     writer,
                     quiesce=FLAGS.quiesce,
                     checkpoint_dir=checkpoint_dir,
                     seed=seed,
                     dqn_iters=FLAGS.iter_stop_dqn)

    writer.close()
예제 #25
0
  def __init__(self,
               game,
               discount=1.0,
               chance_event_sampler=None,
               observation_type=None,
               include_full_state=False,
               **kwargs):
    """Constructor.

    Args:
      game: [string, pyspiel.Game] Open Spiel game name or game instance.
      discount: float, discount used in non-initial steps. Defaults to 1.0.
      chance_event_sampler: optional object with `sample_external_events` method
        to sample chance events.
      observation_type: what kind of observation to use. If not specified, will
        default to INFORMATION_STATE unless the game doesn't provide it.
      include_full_state: whether or not to include the full serialized
        OpenSpiel state in the observations (sometimes useful for debugging).
      **kwargs: dict, additional settings passed to the Open Spiel game.
    """
    self._chance_event_sampler = chance_event_sampler or ChanceEventSampler()
    self._include_full_state = include_full_state

    if isinstance(game, str):
      if kwargs:
        game_settings = {
            key: pyspiel.GameParameter(val) for (key, val) in kwargs.items()
        }
        logging.info("Using game settings: %s", game_settings)
        self._game = pyspiel.load_game(game, game_settings)
      else:
        logging.info("Using game string: %s", game)
        self._game = pyspiel.load_game(game)
    else:  # pyspiel.Game or API-compatible object.
      logging.info("Using game instance: %s", game.get_type().short_name)
      self._game = game

    self._num_players = self._game.num_players()
    self._state = None
    self._should_reset = True

    # Discount returned at non-initial steps.
    self._discounts = [discount] * self._num_players

    # Determine what observation type to use.
    if observation_type is None:
      if self._game.get_type().provides_information_state_tensor:
        observation_type = ObservationType.INFORMATION_STATE
      else:
        observation_type = ObservationType.OBSERVATION

    # Check the requested observation type is supported.
    if observation_type == ObservationType.OBSERVATION:
      if not self._game.get_type().provides_observation_tensor:
        raise ValueError("observation_tensor not supported by " + game)
    elif observation_type == ObservationType.INFORMATION_STATE:
      if not self._game.get_type().provides_information_state_tensor:
        raise ValueError("information_state_tensor not supported by " + game)
    self._use_observation = (observation_type == ObservationType.OBSERVATION)
예제 #26
0
 def initialize_board(self):
     # create go board
     board_size = {"board_size": pyspiel.GameParameter(9)}
     game = pyspiel.load_game("go", board_size)
     self.board_state = game.new_initial_state()
     self.game_states = []
     for i in range(7):
         self.game_states.append(np.zeros([9, 9]))
예제 #27
0
 def __init__(self,params={"state":pyspiel.GameParameter('')}):
     super().__init__(self, _GAME_TYPE, _GAME_INFO, params or dict())
     #state input as json with keys players,rewards,board,grandes,king
     if params.get("state",None) is not None:
         state=params["state"].string_value() 
         if state=='':
             state=_DEFAULT_STATE
         self._parent_game_state=state 
예제 #28
0
 def test_solve_small_pig(self):
     game = pyspiel.load_game("pig",
                              {"winscore": pyspiel.GameParameter(20)})
     values = value_iteration.value_iteration(game,
                                              depth_limit=-1,
                                              threshold=1e-6,
                                              cyclic_game=True)
     initial_state = game.new_initial_state()
     print("Value of Pig(20): ", values[str(initial_state)])
예제 #29
0
def main(_):
    tensorflow.random.set_random_seed(int(FLAGS.random_seed))
    game = pyspiel.load_game(FLAGS.game,
                             {"players": pyspiel.GameParameter(FLAGS.players)})

    # game = pyspiel.load_game(FLAGS.game)

    models = []
    for _ in range(game.num_players()):
        models.append(
            neurd.DeepNeurdModel(
                game,
                num_hidden_layers=FLAGS.num_hidden_layers,
                num_hidden_units=FLAGS.num_hidden_units,
                num_hidden_factors=FLAGS.num_hidden_factors,
                use_skip_connections=FLAGS.use_skip_connections,
                autoencode=FLAGS.autoencode))

    solver = neurd.CounterfactualNeurdSolver(game, FLAGS.alpha, models)

    def _train(model, data):
        neurd.train(model,
                    data,
                    batch_size=FLAGS.batch_size,
                    step_size=FLAGS.step_size,
                    alpha=FLAGS.alpha,
                    threshold=FLAGS.threshold,
                    autoencoder_loss=(tf.compat.v1.losses.huber_loss
                                      if FLAGS.autoencode else None))

    conv = 100

    # exploitabilities = []
    # start_time = time.time()
    for i in range(FLAGS.iterations):
        # send i into the function to notify the adaptation of alpha

        if FLAGS.adaptive_alpha:
            solver.evaluate_and_update_policy(
                _train,
                current_iteration=i,
                alpha=FLAGS.alpha,
                increase=FLAGS.increase,
                gamma=FLAGS.gamma,
                adaptive_policy=FLAGS.adaptive_policy,
                total_iteration=FLAGS.iterations,
                semi_percent=FLAGS.semi_percent,
                exploit_rate=FLAGS.exploit_rate,
                conv=conv,
                exp_exploit_rate=FLAGS.exp_exploit_rate)
        else:
            solver.evaluate_and_update_policy(_train, alpha=FLAGS.alpha)

        if i % FLAGS.print_freq == 0:
            conv = pyspiel.exploitability(game, solver.average_policy())
            print("Iteration {} exploitability {}".format(i, conv))
예제 #30
0
def main(_):
  game = pyspiel.load_game(FLAGS.game,
                           {"players": pyspiel.GameParameter(FLAGS.players)})
  cfr_solver = cfr.CFRSolver(game)

  for i in range(FLAGS.iterations):
    cfr_solver.evaluate_and_update_policy()
    if i % FLAGS.print_freq == 0:
      conv = exploitability.exploitability(game, cfr_solver.average_policy())
      print("Iteration {} exploitability {}".format(i, conv))