def nfsp_measure_exploitability_nonlstm(rllib_policies: List[Policy], poker_game_version: str, open_spiel_env_config: dict = None): if open_spiel_env_config is None: if poker_game_version in ["kuhn_poker", "leduc_poker"]: open_spiel_env_config = { "players": pyspiel.GameParameter(2) } else: open_spiel_env_config = {} open_spiel_env_config = {k: pyspiel.GameParameter(v) if not isinstance(v, pyspiel.GameParameter) else v for k, v in open_spiel_env_config.items()} openspiel_game = pyspiel.load_game(poker_game_version, open_spiel_env_config) if poker_game_version == "oshi_zumo": openspiel_game = pyspiel.convert_to_turn_based(openspiel_game) opnsl_policies = [] for rllib_policy in rllib_policies: openspiel_policy = openspiel_policy_from_nonlstm_rllib_policy(openspiel_game=openspiel_game, rllib_policy=rllib_policy, game_version=poker_game_version, game_parameters=open_spiel_env_config, ) opnsl_policies.append(openspiel_policy) nfsp_policy = JointPlayerPolicy(game=openspiel_game, policies=opnsl_policies) # Exploitability is NashConv / num_players if poker_game_version == "universal_poker": print("Measuring exploitability for universal_poker policy. This will take a while...") exploitability_result = exploitability(game=openspiel_game, policy=nfsp_policy) return exploitability_result
def main(argv): if len(argv) > 1: raise app.UsageError("Too many command-line arguments.") game = pyspiel.load_game( "bridge_uncontested_bidding", { "relative_scoring": pyspiel.GameParameter(True), "rng_seed": pyspiel.GameParameter(FLAGS.rng_seed), }) bots = [ bluechip_bridge_uncontested_bidding.BlueChipBridgeBot( game, 0, _WBridge5Client(FLAGS.bot_cmd)), bluechip_bridge_uncontested_bidding.BlueChipBridgeBot( game, 1, _WBridge5Client(FLAGS.bot_cmd)), ] results = [] for i_deal in range(FLAGS.num_deals): state = _run_once(game.new_initial_state(), bots) print("Deal #{}; final state:\n{}".format(i_deal, state)) results.append(state.returns()) stats = np.array(results) mean = np.mean(stats, axis=0) stderr = np.std(stats, axis=0, ddof=1) / np.sqrt(FLAGS.num_deals) print(u"Absolute score: {:+.1f}\u00b1{:.1f}".format(mean[0], stderr[0])) print(u"Relative score: {:+.1f}\u00b1{:.1f}".format(mean[1], stderr[1]))
def test_solve_small_oshi_zumo(self): # Oshi-Zumo(5, 2, 0) game = pyspiel.load_game("oshi_zumo", { "coins": pyspiel.GameParameter(5), "size": pyspiel.GameParameter(2) }) values = value_iteration.value_iteration(game, depth_limit=-1, threshold=1e-6, cyclic_game=True) initial_state = game.new_initial_state() # Symmetric game: value is 0 self.assertAlmostEqual(values[str(initial_state)], 0) # Oshi-Zumo(5, 2, 1) game = pyspiel.load_game( "oshi_zumo", { "coins": pyspiel.GameParameter(5), "size": pyspiel.GameParameter(2), "min_bid": pyspiel.GameParameter(1) }) values = value_iteration.value_iteration(game, depth_limit=-1, threshold=1e-6, cyclic_game=False) initial_state = game.new_initial_state() # Symmetric game: value is 0 self.assertAlmostEqual(values[str(initial_state)], 0)
def test_game_parameters_from_string_with_options(self): self.assertEqual( pyspiel.game_parameters_from_string("foo(x=2,y=true)"), { "name": pyspiel.GameParameter("foo"), "x": pyspiel.GameParameter(2), "y": pyspiel.GameParameter(True) })
def main(_): action_string = None print("Creating game: " + FLAGS.game) if FLAGS.num_nodes is not None: distances = np.random.random((FLAGS.num_nodes,2)) dist_mat = np.round(distance_matrix(distances, distances),2).flatten() generated_weights = str(dist_mat[0]) for i in range(1,dist_mat.size): generated_weights+="," + str(dist_mat[i]) game = pyspiel.load_game(FLAGS.game, {"num_nodes": pyspiel.GameParameter(FLAGS.num_nodes), "weights": pyspiel.GameParameter(generated_weights)}) else: game = pyspiel.load_game(FLAGS.game, {"num_nodes": pyspiel.GameParameter(5), "weights": pyspiel.GameParameter("0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0")}) # Get a new state if FLAGS.load_state is not None: # Load a specific state state_string = "" with open(FLAGS.load_state, encoding="utf-8") as input_file: for line in input_file: state_string += line state_string = state_string.rstrip() print("Loading state:") print(state_string) print("") state = game.deserialize_state(state_string) else: state = game.new_initial_state() # Print the initial state print(str(state)) while not state.is_terminal(): # The state can be three different types: chance node, # simultaneous node, or decision node legal_actions = state.legal_actions(state.current_player()) print("Legal Actions: ", [(i//FLAGS.num_nodes, i%FLAGS.num_nodes) for i in legal_actions]) # Decision node: sample action for the single current player action = random.choice(legal_actions) action_string = state.action_to_string(state.current_player(), action) print("Player ", state.current_player(), ", randomly sampled action: ", action_string) state.apply_action(action) print(str(state)) print("Information State: ", state.information_state_string()) print("Edge Values: ", dist_mat) # Game is now done. Print utilities for each player returns = state.returns() for pid in range(game.num_players()): print("Utility for player {} is {}".format(pid, returns[pid]))
def test_iigoofspiel4(self): game = pyspiel.load_game_as_turn_based( "goofspiel", { "imp_info": pyspiel.GameParameter(True), "num_cards": pyspiel.GameParameter(4), "points_order": pyspiel.GameParameter("descending"), }) val1, val2, _, _ = sequence_form_lp.solve_zero_sum_game(game) # symmetric game, should be 0 self.assertAlmostEqual(val1, 0) self.assertAlmostEqual(val2, 0)
def __init__(self, scale=1, shift=0): self.scale = scale self.shift = shift self.game = pyspiel.load_game( "coop_box_pushing_serial", { "fully_observable": pyspiel.GameParameter(True), "horizon": pyspiel.GameParameter(100) }) self.state = self.game.new_initial_state() self._max_episode_steps = self.game.max_game_length() self.env_seed = 0
def nxdo_nfsp_measure_exploitability_nonlstm( rllib_policies: List[Policy], use_delegate_policy_exploration: bool, restricted_game_convertors: Union[ List[RestrictedToBaseGameActionSpaceConverter], List[AgentRestrictedGameOpenSpielObsConversions]], poker_game_version: str, open_spiel_env_config: dict = None): if open_spiel_env_config is None: if poker_game_version in ["kuhn_poker", "leduc_poker"]: open_spiel_env_config = {"players": pyspiel.GameParameter(2)} elif poker_game_version in ["oshi_zumo_tiny"]: poker_game_version = "oshi_zumo" open_spiel_env_config = { "coins": pyspiel.GameParameter(6), "size": pyspiel.GameParameter(2), "horizon": pyspiel.GameParameter(8), } else: open_spiel_env_config = {} open_spiel_env_config = { k: pyspiel.GameParameter(v) if not isinstance(v, pyspiel.GameParameter) else v for k, v in open_spiel_env_config.items() } openspiel_game = pyspiel.load_game(poker_game_version, open_spiel_env_config) opnsl_policies = [] assert isinstance(restricted_game_convertors, list) for action_space_converter, rllib_policy in zip(restricted_game_convertors, rllib_policies): openspiel_policy = openspiel_policy_from_nonlstm_rllib_nxdo_policy( openspiel_game=openspiel_game, rllib_policy=rllib_policy, restricted_game_convertor=action_space_converter, use_delegate_policy_exploration=use_delegate_policy_exploration) opnsl_policies.append(openspiel_policy) nfsp_policy = JointPlayerPolicy(game=openspiel_game, policies=opnsl_policies) # Exploitability is NashConv / num_players if poker_game_version == "universal_poker": print( "Measuring exploitability for universal_poker policy. This will take a while..." ) exploitability_result = exploitability(game=openspiel_game, policy=nfsp_policy) return exploitability_result
def test_passing_params(self): game = pyspiel.load_game("tic_tac_toe") bots = [ pyspiel.load_bot( "fixed_action_preference", game, player=0, params={"actions": pyspiel.GameParameter("0:1:2")}), pyspiel.load_bot("fixed_action_preference", game, player=1, params={"actions": pyspiel.GameParameter("3:4")}), ] result = pyspiel.evaluate_bots(game.new_initial_state(), bots, seed=0) self.assertEqual(result, [1, -1]) # Player 0 wins.
def cfr_train(unused_arg): exploit_history = list() exploit_idx = list() tf.enable_eager_execution() game = pyspiel.load_game(FLAGS.game, {"players": pyspiel.GameParameter(2)}) agent_name = "cfr" cfr_solver = cfr.CFRSolver(game) checkpoint = datetime.now() for ep in range(FLAGS.episodes): cfr_solver.evaluate_and_update_policy() if ep % 100 == 0: delta = datetime.now() - checkpoint conv = exploitability.exploitability(game, cfr_solver.average_policy()) exploit_idx.append(ep) exploit_history.append(conv) print( "Iteration {} exploitability {} - {} seconds since last checkpoint" .format(ep, conv, delta.seconds)) checkpoint = datetime.now() pickle.dump([exploit_idx, exploit_history], open( FLAGS.game + "_" + agent_name + "_" + str(FLAGS.episodes) + ".dat", "wb")) now = datetime.now() policy = cfr_solver.average_policy() agent_name = "cfr" for pid in [1, 2]: policy_to_csv( game, policy, f"policies/policy_" + now.strftime("%m-%d-%Y_%H-%M") + "_" + agent_name + "_" + str(pid + 1) + "_+" + str(ep) + "episodes.csv")
def nfsp_measure_exploitability_nonlstm(rllib_p0_and_p1_policies, poker_game_version): if poker_game_version in [KUHN_POKER, LEDUC_POKER]: open_spiel_env_config = {"players": pyspiel.GameParameter(2)} else: open_spiel_env_config = {} openspiel_game = pyspiel.load_game(poker_game_version, open_spiel_env_config) openspiel_env = Environment(poker_game_version, open_spiel_env_config) openspiel_policies = [] for rllib_policy in rllib_p0_and_p1_policies: if not isinstance(rllib_policy, OSPolicy): openspiel_policy = openspiel_policy_from_nonlstm_rllib_policy( openspiel_game=openspiel_game, poker_game_version=poker_game_version, rllib_policy=rllib_policy) else: openspiel_policy = rllib_policy openspiel_policies.append(openspiel_policy) nfsp_os_policy = NFSPPolicies(env=openspiel_env, nfsp_policies=openspiel_policies) # Exploitability is NashConv / num_players exploitability_result = exploitability(game=openspiel_game, policy=nfsp_os_policy) return exploitability_result
def main(_): game = pyspiel.load_game( FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.players)}, ) if FLAGS.sampling == "external": solver = pyspiel.ExternalSamplingMCCFRSolver( game, avg_type=pyspiel.MCCFRAverageType.FULL, ) elif FLAGS.sampling == "outcome": solver = pyspiel.OutcomeSamplingMCCFRSolver(game) for i in range(int(FLAGS.iterations / 2)): solver.run_iteration() print("Iteration {} exploitability: {:.6f}".format( i, pyspiel.exploitability(game, solver.average_policy()))) print("Persisting the model...") with open(MODEL_FILE_NAME.format(FLAGS.sampling), "wb") as file: pickle.dump(solver, file, pickle.HIGHEST_PROTOCOL) print("Loading the model...") with open(MODEL_FILE_NAME.format(FLAGS.sampling), "rb") as file: loaded_solver = pickle.load(file) print("Exploitability of the loaded model: {:.6f}".format( pyspiel.exploitability(game, loaded_solver.average_policy()))) for i in range(int(FLAGS.iterations / 2)): solver.run_iteration() print("Iteration {} exploitability: {:.6f}".format( int(FLAGS.iterations / 2) + i, pyspiel.exploitability(game, solver.average_policy())))
def test_create_repeated_game(self): """Test both create_repeated_game function signatures.""" repeated_game = pyspiel.create_repeated_game( "matrix_rps", {"num_repetitions": pyspiel.GameParameter(10)}) state = repeated_game.new_initial_state() for _ in range(10): state.apply_actions([0, 0]) assert state.is_terminal() stage_game = pyspiel.load_game("matrix_mp") repeated_game = pyspiel.create_repeated_game( stage_game, {"num_repetitions": pyspiel.GameParameter(5)}) state = repeated_game.new_initial_state() for _ in range(5): state.apply_actions([0, 0]) assert state.is_terminal()
def test_gpsro(self, game_name, rnr_iterations, sims_per_entry, number_players, rectify_training, training_strategy_selector, meta_strategy_method): game = pyspiel.load_game(game_name, {"players": pyspiel.GameParameter(number_players)}) oracle = optimization_oracle.EvolutionaryStrategyOracle( number_policies_sampled=2, number_episodes_sampled=2) g_psro_solver = generalized_psro.GenPSROSolver( game, oracle, sims_per_entry=sims_per_entry, rectify_training=rectify_training, training_strategy_selector=training_strategy_selector, meta_strategy_method=meta_strategy_method) for _ in range(rnr_iterations): g_psro_solver.iteration() meta_game = g_psro_solver.get_meta_game meta_probabilities = g_psro_solver.get_and_update_meta_strategies() logging.info("%s %sP - %s", game_name, str(number_players), meta_strategy_method) logging.info("Meta Strategies") logging.info(meta_probabilities) logging.info("") logging.info("Meta Game Values") logging.info(meta_game) logging.info("")
def main(_): games_list = pyspiel.registered_games() print("Registered games:") print(games_list) print("Creating game: " + FLAGS.game) if FLAGS.players is not None: # If passing parameters, must use game creator. game = pyspiel.load_game( FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.players)}) else: # Otherwise can create directly. game = pyspiel.load_game(FLAGS.game) print("Getting all states; depth_limit = {}".format(FLAGS.depth_limit)) all_states = get_all_states.get_all_states(game, FLAGS.depth_limit, FLAGS.include_terminals, FLAGS.include_chance_states) count = 0 for state in all_states: print("") print(str(state)) count += 1 print("") print("Total: {} states.".format(count))
def main(_): games_list = pyspiel.registered_games() print("Registered games:") for game in games_list: print(" ", game.short_name) print() print("Creating game:", FLAGS.game) params = {} if FLAGS.players is not None: params["players"] = pyspiel.GameParameter(FLAGS.players) game = pyspiel.load_game(FLAGS.game, params) print("Getting all states; depth_limit = {}".format(FLAGS.depth_limit)) all_states = get_all_states.get_all_states(game, FLAGS.depth_limit, FLAGS.include_terminals, FLAGS.include_chance_states) count = 0 for state in all_states: print(state) count += 1 print() print("Total: {} states.".format(count))
def __init__(self, env_name, env_seed=2, deltas=None, slow_oracle_kargs=None, fast_oracle_kargs=None): # initialize rl environment. from open_spiel.python import rl_environment import pyspiel self._num_players = 2 game = pyspiel.load_game_as_turn_based( env_name, {"players": pyspiel.GameParameter(self._num_players)}) self._env = rl_environment.Environment(game) # Each worker gets access to the shared noise table # with independent random streams for sampling # from the shared noise table. self.deltas = SharedNoiseTable(deltas, env_seed + 7) self._policies = [[] for _ in range(self._num_players)] self._slow_oracle_kargs = slow_oracle_kargs self._fast_oracle_kargs = fast_oracle_kargs self._delta_std = self._fast_oracle_kargs['noise'] self._sess = tf.get_default_session() if self._sess is None: self._sess = tf.Session() if self._slow_oracle_kargs is not None: self._slow_oracle_kargs['session'] = self._sess
def xfsp_train(_): exploit_history = list() exploit_idx = list() game = pyspiel.load_game(FLAGS.game, {"players": pyspiel.GameParameter(2)}) fsp_solver = fictitious_play.XFPSolver(game) checkpoint = datetime.now() for ep in range(FLAGS.episodes): if (ep % 1000) == 0: delta = datetime.now() - checkpoint pol = policy.PolicyFromCallable( game, fsp_solver.average_policy_callable()) conv = exploitability.exploitability(game, pol) exploit_history.append(conv) exploit_idx.append(ep) print( "[XFSP] Iteration {} exploitability {} - {} seconds since last checkpoint" .format(ep, conv, delta.seconds)) checkpoint = datetime.now() fsp_solver.iteration() agent_name = "xfsp" pickle.dump([exploit_idx, exploit_history], open( FLAGS.game + "_" + agent_name + "_" + str(FLAGS.episodes) + ".dat", "wb")) pol = policy.PolicyFromCallable(game, fsp_solver.average_policy_callable()) for pid in [1, 2]: policy_to_csv( game, pol, f"policies/policy_" + now.strftime("%m-%d-%Y_%H-%M") + "_" + agent_name + "_" + str(pid + 1) + "_+" + str(FLAGS.episodes) + "episodes.csv")
def main(_): game = pyspiel.load_game( FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.players)}, ) if FLAGS.solver == "cfr": solver = pyspiel.CFRSolver(game) elif FLAGS.solver == "cfrplus": solver = pyspiel.CFRPlusSolver(game) elif FLAGS.solver == "cfrbr": solver = pyspiel.CFRBRSolver(game) for i in range(int(FLAGS.iterations / 2)): solver.evaluate_and_update_policy() print("Iteration {} exploitability: {:.6f}".format( i, pyspiel.exploitability(game, solver.average_policy()))) print("Persisting the model...") with open("{}_solver.pickle".format(FLAGS.solver), "wb") as file: pickle.dump(solver, file, pickle.HIGHEST_PROTOCOL) print("Loading the model...") with open("{}_solver.pickle".format(FLAGS.solver), "rb") as file: loaded_solver = pickle.load(file) print("Exploitability of the loaded model: {:.6f}".format( pyspiel.exploitability(game, loaded_solver.average_policy()))) for i in range(int(FLAGS.iterations / 2)): loaded_solver.evaluate_and_update_policy() print("Iteration {} exploitability: {:.6f}".format( int(FLAGS.iterations / 2) + i, pyspiel.exploitability(game, loaded_solver.average_policy())))
def __init__(self, game_name, discount=1.0, seed=None, **kwargs): """Constructor. Args: game_name: string, Open Spiel game name. discount: float, discount used in non-initial steps. Defaults to 1.0. seed: int, random number generator seed. Defaults to None. **kwargs: dict, additional settings passed to the Open Spiel game. """ self._rng = np.random.RandomState(seed) game_settings = { key: pyspiel.GameParameter(val) for (key, val) in kwargs.items() } logging.info("Using game settings: %s", game_settings) self._game = pyspiel.load_game(game_name, game_settings) self._num_players = self._game.num_players() self._state = None self._should_reset = True # Discount returned at non-initial steps. self._discounts = [discount] * self._num_players # Decide whether to use observation or information_state if self._game.get_type( ).provides_information_state_as_normalized_vector: self._use_observation = False elif self._game.get_type().provides_observation_as_normalized_vector: self._use_observation = True else: raise ValueError("Game must provide either information state or " "observation as a normalized vector")
def main(_): game = pyspiel.load_game(FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.players)}) models = [] for _ in range(game.num_players()): models.append( neurd.DeepNeurdModel( game, num_hidden_layers=FLAGS.num_hidden_layers, num_hidden_units=FLAGS.num_hidden_units, num_hidden_factors=FLAGS.num_hidden_factors, use_skip_connections=FLAGS.use_skip_connections, autoencode=FLAGS.autoencode)) solver = neurd.CounterfactualNeurdSolver(game, models) def _train(model, data): neurd.train(model, data, batch_size=FLAGS.batch_size, step_size=FLAGS.step_size, threshold=FLAGS.threshold, autoencoder_loss=(tf.compat.v1.losses.huber_loss if FLAGS.autoencode else None)) for i in range(FLAGS.iterations): solver.evaluate_and_update_policy(_train) if i % FLAGS.print_freq == 0: conv = pyspiel.exploitability(game, solver.average_policy()) print("Iteration {} exploitability {}".format(i, conv))
def __init__(self, game, discount=1.0, chance_event_sampler=None, observation_type=None, seed=None, **kwargs): """Constructor. Args: game: [string, pyspiel.Game] Open Spiel game name or game instance. discount: float, discount used in non-initial steps. Defaults to 1.0. chance_event_sampler: optional object with `sample_external_events` method to sample chance events. observation_type: what kind of observation to use. If not specified, will default to INFORMATION_STATE unless the game doesn't provide it. **kwargs: dict, additional settings passed to the Open Spiel game. """ self._chance_event_sampler = chance_event_sampler or ChanceEventSampler( seed=seed) if isinstance(game, pyspiel.Game): logging.info("Using game instance: %s", game.get_type().short_name) self._game = game elif kwargs: game_settings = { key: pyspiel.GameParameter(val) for (key, val) in kwargs.items() } logging.info("Using game settings: %s", game_settings) self._game = pyspiel.load_game(game, game_settings) else: logging.info("Using game string: %s", game) self._game = pyspiel.load_game(game) self._num_players = self._game.num_players() self._state = None self._should_reset = True # Discount returned at non-initial steps. self._discounts = [discount] * self._num_players # Determine what observation type to use. if observation_type is None: if self._game.get_type().provides_information_state_tensor: observation_type = ObservationType.INFORMATION_STATE else: observation_type = ObservationType.OBSERVATION ## Check the requested observation type is supported. ## laser tag's state provides observation_tensor, yet here turn_based_simultaneous game ## says it does not support observation_tensor #if observation_type == ObservationType.OBSERVATION: # if not self._game.get_type().provides_observation_tensor: # raise ValueError("observation_tensor not supported by " + game) #elif observation_type == ObservationType.INFORMATION_STATE: # if not self._game.get_type().provides_information_state_tensor: # raise ValueError("information_state_tensor not supported by " + game) self._use_observation = ( observation_type == ObservationType.OBSERVATION)
def main(unused_argv): game = pyspiel.load_game( FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.num_players)}) oracle = optimization_oracle.EvolutionaryStrategyOracle( n_evolution_tests=FLAGS.n_evolution_tests, number_policies_sampled=FLAGS.number_policies_sampled, number_episodes_sampled=FLAGS.number_episodes_sampled, alpha=FLAGS.alpha, beta=FLAGS.beta) g_psro_solver = generalized_psro.GenPSROSolver( game, oracle, sims_per_entry=FLAGS.sims_per_entry, meta_strategy_method='nash', rectify_training=FLAGS.rectify_training) for a in range(FLAGS.gen_psro_iterations): g_psro_solver.iteration() nash_probabilities = g_psro_solver.get_and_update_meta_strategies() logging.info("%s / %s", a + 1, FLAGS.gen_psro_iterations) logging.info(nash_probabilities) meta_game = g_psro_solver.get_meta_game meta_probabilities = g_psro_solver.get_and_update_meta_strategies() logging.info("%s meta probabilities", FLAGS.game) logging.info(meta_probabilities) logging.info("") logging.info("%s Meta Game Values", FLAGS.game) logging.info(meta_game) logging.info("")
def main(argv): if len(argv) > 1: raise app.UsageError("Too many command-line arguments.") if FLAGS.seed is None: seed = np.random.randint(low=0, high=1e5) else: seed = FLAGS.seed np.random.seed(seed) random.seed(seed) tf.set_random_seed(seed) game = pyspiel.load_game_as_turn_based( FLAGS.game_name, {"players": pyspiel.GameParameter(FLAGS.n_players)}) env = rl_environment.Environment(game, seed=seed) env.reset() if not os.path.exists(FLAGS.root_result_folder): os.makedirs(FLAGS.root_result_folder) checkpoint_dir = 'tuning_ars' + str( FLAGS.iter_stop_dqn) + '_' + FLAGS.game_name + str( FLAGS.n_players) + '_sims_' + str( FLAGS.sims_per_entry) + '_it_' + str( FLAGS.gpsro_iterations) + '_ep_' + str( FLAGS.number_training_episodes ) + '_or_' + FLAGS.oracle_type + '_arsnd_' + str( FLAGS.num_directions) + '_se_' + str( seed) + '_' + datetime.datetime.now().strftime( '%Y-%m-%d_%H-%M-%S') checkpoint_dir = os.path.join(os.getcwd(), FLAGS.root_result_folder, checkpoint_dir) writer = SummaryWriter(logdir=checkpoint_dir + '/log') if FLAGS.sbatch_run: sys.stdout = open(checkpoint_dir + '/stdout.txt', 'w+') # Initialize oracle and agents with tf.Session() as sess: if FLAGS.oracle_type == "DQN": oracle, agents = init_dqn_responder(sess, env) elif FLAGS.oracle_type == "PG": oracle, agents = init_pg_responder(sess, env) elif FLAGS.oracle_type == "BR": oracle, agents = init_br_responder(env) elif FLAGS.oracle_type == "ARS": oracle, agents = init_ars_responder(sess, env) elif FLAGS.oracle_type == "ARS_parallel": oracle, agents = init_ars_parallel_responder(sess, env, None) sess.run(tf.global_variables_initializer()) gpsro_looper(env, oracle, agents, writer, quiesce=FLAGS.quiesce, checkpoint_dir=checkpoint_dir, seed=seed, dqn_iters=FLAGS.iter_stop_dqn) writer.close()
def __init__(self, game, discount=1.0, chance_event_sampler=None, observation_type=None, include_full_state=False, **kwargs): """Constructor. Args: game: [string, pyspiel.Game] Open Spiel game name or game instance. discount: float, discount used in non-initial steps. Defaults to 1.0. chance_event_sampler: optional object with `sample_external_events` method to sample chance events. observation_type: what kind of observation to use. If not specified, will default to INFORMATION_STATE unless the game doesn't provide it. include_full_state: whether or not to include the full serialized OpenSpiel state in the observations (sometimes useful for debugging). **kwargs: dict, additional settings passed to the Open Spiel game. """ self._chance_event_sampler = chance_event_sampler or ChanceEventSampler() self._include_full_state = include_full_state if isinstance(game, str): if kwargs: game_settings = { key: pyspiel.GameParameter(val) for (key, val) in kwargs.items() } logging.info("Using game settings: %s", game_settings) self._game = pyspiel.load_game(game, game_settings) else: logging.info("Using game string: %s", game) self._game = pyspiel.load_game(game) else: # pyspiel.Game or API-compatible object. logging.info("Using game instance: %s", game.get_type().short_name) self._game = game self._num_players = self._game.num_players() self._state = None self._should_reset = True # Discount returned at non-initial steps. self._discounts = [discount] * self._num_players # Determine what observation type to use. if observation_type is None: if self._game.get_type().provides_information_state_tensor: observation_type = ObservationType.INFORMATION_STATE else: observation_type = ObservationType.OBSERVATION # Check the requested observation type is supported. if observation_type == ObservationType.OBSERVATION: if not self._game.get_type().provides_observation_tensor: raise ValueError("observation_tensor not supported by " + game) elif observation_type == ObservationType.INFORMATION_STATE: if not self._game.get_type().provides_information_state_tensor: raise ValueError("information_state_tensor not supported by " + game) self._use_observation = (observation_type == ObservationType.OBSERVATION)
def initialize_board(self): # create go board board_size = {"board_size": pyspiel.GameParameter(9)} game = pyspiel.load_game("go", board_size) self.board_state = game.new_initial_state() self.game_states = [] for i in range(7): self.game_states.append(np.zeros([9, 9]))
def __init__(self,params={"state":pyspiel.GameParameter('')}): super().__init__(self, _GAME_TYPE, _GAME_INFO, params or dict()) #state input as json with keys players,rewards,board,grandes,king if params.get("state",None) is not None: state=params["state"].string_value() if state=='': state=_DEFAULT_STATE self._parent_game_state=state
def test_solve_small_pig(self): game = pyspiel.load_game("pig", {"winscore": pyspiel.GameParameter(20)}) values = value_iteration.value_iteration(game, depth_limit=-1, threshold=1e-6, cyclic_game=True) initial_state = game.new_initial_state() print("Value of Pig(20): ", values[str(initial_state)])
def main(_): tensorflow.random.set_random_seed(int(FLAGS.random_seed)) game = pyspiel.load_game(FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.players)}) # game = pyspiel.load_game(FLAGS.game) models = [] for _ in range(game.num_players()): models.append( neurd.DeepNeurdModel( game, num_hidden_layers=FLAGS.num_hidden_layers, num_hidden_units=FLAGS.num_hidden_units, num_hidden_factors=FLAGS.num_hidden_factors, use_skip_connections=FLAGS.use_skip_connections, autoencode=FLAGS.autoencode)) solver = neurd.CounterfactualNeurdSolver(game, FLAGS.alpha, models) def _train(model, data): neurd.train(model, data, batch_size=FLAGS.batch_size, step_size=FLAGS.step_size, alpha=FLAGS.alpha, threshold=FLAGS.threshold, autoencoder_loss=(tf.compat.v1.losses.huber_loss if FLAGS.autoencode else None)) conv = 100 # exploitabilities = [] # start_time = time.time() for i in range(FLAGS.iterations): # send i into the function to notify the adaptation of alpha if FLAGS.adaptive_alpha: solver.evaluate_and_update_policy( _train, current_iteration=i, alpha=FLAGS.alpha, increase=FLAGS.increase, gamma=FLAGS.gamma, adaptive_policy=FLAGS.adaptive_policy, total_iteration=FLAGS.iterations, semi_percent=FLAGS.semi_percent, exploit_rate=FLAGS.exploit_rate, conv=conv, exp_exploit_rate=FLAGS.exp_exploit_rate) else: solver.evaluate_and_update_policy(_train, alpha=FLAGS.alpha) if i % FLAGS.print_freq == 0: conv = pyspiel.exploitability(game, solver.average_policy()) print("Iteration {} exploitability {}".format(i, conv))
def main(_): game = pyspiel.load_game(FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.players)}) cfr_solver = cfr.CFRSolver(game) for i in range(FLAGS.iterations): cfr_solver.evaluate_and_update_policy() if i % FLAGS.print_freq == 0: conv = exploitability.exploitability(game, cfr_solver.average_policy()) print("Iteration {} exploitability {}".format(i, conv))