예제 #1
0
def main(unused_argv):
    game = pyspiel.load_game(
        FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.num_players)})

    oracle = optimization_oracle.EvolutionaryStrategyOracle(
        n_evolution_tests=FLAGS.n_evolution_tests,
        number_policies_sampled=FLAGS.number_policies_sampled,
        number_episodes_sampled=FLAGS.number_episodes_sampled,
        alpha=FLAGS.alpha,
        beta=FLAGS.beta)
    g_psro_solver = generalized_psro.GenPSROSolver(
        game,
        oracle,
        sims_per_entry=FLAGS.sims_per_entry,
        meta_strategy_method='nash',
        rectify_training=FLAGS.rectify_training)
    for a in range(FLAGS.gen_psro_iterations):
        g_psro_solver.iteration()
        nash_probabilities = g_psro_solver.get_and_update_meta_strategies()
        logging.info("%s / %s", a + 1, FLAGS.gen_psro_iterations)
        logging.info(nash_probabilities)

    meta_game = g_psro_solver.get_meta_game
    meta_probabilities = g_psro_solver.get_and_update_meta_strategies()

    logging.info("%s meta probabilities", FLAGS.game)
    logging.info(meta_probabilities)
    logging.info("")

    logging.info("%s Meta Game Values", FLAGS.game)
    logging.info(meta_game)
    logging.info("")
예제 #2
0
def main(unused_argv):
    game = pyspiel.load_game(FLAGS.game)

    oracle = optimization_oracle.EvolutionaryStrategyOracle(
        n_evolution_tests=FLAGS.n_evolution_tests,
        number_policies_sampled=FLAGS.number_policies_sampled,
        number_episodes_sampled=FLAGS.number_episodes_sampled,
        alpha=FLAGS.alpha,
        beta=FLAGS.beta)
    rnr_solver = rectified_nash_response.RNRSolver(
        game,
        oracle,
        sims_per_entry=FLAGS.sims_per_entry,
        rectify_training=FLAGS.rectify_nash)
    for a in range(FLAGS.rnr_iterations):
        rnr_solver.iteration()
        nash_probabilities = rnr_solver.get_and_update_meta_strategies()
        print("{} / {}".format(a + 1, FLAGS.rnr_iterations))
        print(nash_probabilities)
    meta_game = rnr_solver.get_meta_game
    nash_probabilities = rnr_solver.get_and_update_meta_strategies()

    print(FLAGS.game + " Nash probabilities")
    print(nash_probabilities)
    print("")

    print(FLAGS.game + " Meta Game Values")
    print(meta_game)
    print("")
예제 #3
0
    def test_rnr(self, game_name, rnr_iterations, sims_per_entry,
                 rectify_training, restrict_training,
                 meta_strategy_computation_method):
        game = pyspiel.load_game(game_name)
        oracle = optimization_oracle.EvolutionaryStrategyOracle(
            number_policies_sampled=2, number_episodes_sampled=2)
        rnr_solver = rectified_nash_response.RNRSolver(
            game,
            oracle,
            sims_per_entry=sims_per_entry,
            rectify_training=rectify_training,
            restrict_training=restrict_training,
            meta_strategy_computation_method=meta_strategy_computation_method)
        for _ in range(rnr_iterations):
            rnr_solver.iteration()
        meta_game = rnr_solver.get_meta_game
        nash_probabilities = rnr_solver.get_and_update_meta_strategies()

        print(game_name + " Nash probabilities")
        print(nash_probabilities)
        print("")

        print(game_name + " Meta Game Values")
        print(meta_game)
        print("")
  def test_gpsro(self, game_name, rnr_iterations, sims_per_entry,
                 number_players, rectify_training, training_strategy_selector,
                 meta_strategy_method):
    game = pyspiel.load_game(game_name,
                             {"players": pyspiel.GameParameter(number_players)})
    oracle = optimization_oracle.EvolutionaryStrategyOracle(
        number_policies_sampled=2, number_episodes_sampled=2)
    g_psro_solver = generalized_psro.GenPSROSolver(
        game,
        oracle,
        sims_per_entry=sims_per_entry,
        rectify_training=rectify_training,
        training_strategy_selector=training_strategy_selector,
        meta_strategy_method=meta_strategy_method)
    for _ in range(rnr_iterations):
      g_psro_solver.iteration()
    meta_game = g_psro_solver.get_meta_game
    meta_probabilities = g_psro_solver.get_and_update_meta_strategies()

    logging.info("%s %sP - %s", game_name, str(number_players),
                 meta_strategy_method)
    logging.info("Meta Strategies")
    logging.info(meta_probabilities)
    logging.info("")

    logging.info("Meta Game Values")
    logging.info(meta_game)
    logging.info("")