Esempio n. 1
0
def main(_):
    game = pyspiel.load_game(
        FLAGS.game,
        {"players": FLAGS.players},
    )

    if FLAGS.solver == "cfr":
        solver = pyspiel.CFRSolver(game)
    elif FLAGS.solver == "cfrplus":
        solver = pyspiel.CFRPlusSolver(game)
    elif FLAGS.solver == "cfrbr":
        solver = pyspiel.CFRBRSolver(game)

    for i in range(int(FLAGS.iterations / 2)):
        solver.evaluate_and_update_policy()
        print("Iteration {} exploitability: {:.6f}".format(
            i, pyspiel.exploitability(game, solver.average_policy())))

    print("Persisting the model...")
    with open("{}_solver.pickle".format(FLAGS.solver), "wb") as file:
        pickle.dump(solver, file, pickle.HIGHEST_PROTOCOL)

    print("Loading the model...")
    with open("{}_solver.pickle".format(FLAGS.solver), "rb") as file:
        loaded_solver = pickle.load(file)
    print("Exploitability of the loaded model: {:.6f}".format(
        pyspiel.exploitability(game, loaded_solver.average_policy())))

    for i in range(int(FLAGS.iterations / 2)):
        loaded_solver.evaluate_and_update_policy()
        print("Iteration {} exploitability: {:.6f}".format(
            int(FLAGS.iterations / 2) + i,
            pyspiel.exploitability(game, loaded_solver.average_policy())))
Esempio n. 2
0
def main(_):
    game = pyspiel.load_game(
        FLAGS.game,
        {"players": pyspiel.GameParameter(FLAGS.players)},
    )

    if FLAGS.sampling == "external":
        solver = pyspiel.ExternalSamplingMCCFRSolver(
            game,
            avg_type=pyspiel.MCCFRAverageType.FULL,
        )
    elif FLAGS.sampling == "outcome":
        solver = pyspiel.OutcomeSamplingMCCFRSolver(game)

    for i in range(int(FLAGS.iterations / 2)):
        solver.run_iteration()
        print("Iteration {} exploitability: {:.6f}".format(
            i, pyspiel.exploitability(game, solver.average_policy())))

    print("Persisting the model...")
    with open(MODEL_FILE_NAME.format(FLAGS.sampling), "wb") as file:
        pickle.dump(solver, file, pickle.HIGHEST_PROTOCOL)

    print("Loading the model...")
    with open(MODEL_FILE_NAME.format(FLAGS.sampling), "rb") as file:
        loaded_solver = pickle.load(file)
    print("Exploitability of the loaded model: {:.6f}".format(
        pyspiel.exploitability(game, loaded_solver.average_policy())))

    for i in range(int(FLAGS.iterations / 2)):
        solver.run_iteration()
        print("Iteration {} exploitability: {:.6f}".format(
            int(FLAGS.iterations / 2) + i,
            pyspiel.exploitability(game, solver.average_policy())))
Esempio n. 3
0
def main(_):
    game = pyspiel.load_game(FLAGS.game,
                             {"players": pyspiel.GameParameter(FLAGS.players)})

    models = []
    for _ in range(game.num_players()):
        models.append(
            neurd.DeepNeurdModel(
                game,
                num_hidden_layers=FLAGS.num_hidden_layers,
                num_hidden_units=FLAGS.num_hidden_units,
                num_hidden_factors=FLAGS.num_hidden_factors,
                use_skip_connections=FLAGS.use_skip_connections,
                autoencode=FLAGS.autoencode))

    solver = neurd.CounterfactualNeurdSolver(game, models)

    def _train(model, data):
        neurd.train(model,
                    data,
                    batch_size=FLAGS.batch_size,
                    step_size=FLAGS.step_size,
                    threshold=FLAGS.threshold,
                    autoencoder_loss=(tf.compat.v1.losses.huber_loss
                                      if FLAGS.autoencode else None))

    for i in range(FLAGS.iterations):
        solver.evaluate_and_update_policy(_train)
        if i % FLAGS.print_freq == 0:
            conv = pyspiel.exploitability(game, solver.average_policy())
            print("Iteration {} exploitability {}".format(i, conv))
Esempio n. 4
0
 def test_exploitability_uniform_random_cc(self):
     """Checks the exploitability of the uniform random policy using C++."""
     game = pyspiel.load_game("python_kuhn_poker")
     test_policy = pyspiel.UniformRandomPolicy(game)
     expected_nash_conv = 11 / 12
     self.assertAlmostEqual(pyspiel.exploitability(game, test_policy),
                            expected_nash_conv / 2)
def main(_):
    tensorflow.random.set_random_seed(int(FLAGS.random_seed))
    game = pyspiel.load_game(FLAGS.game,
                             {"players": pyspiel.GameParameter(FLAGS.players)})

    # game = pyspiel.load_game(FLAGS.game)

    models = []
    for _ in range(game.num_players()):
        models.append(
            neurd.DeepNeurdModel(
                game,
                num_hidden_layers=FLAGS.num_hidden_layers,
                num_hidden_units=FLAGS.num_hidden_units,
                num_hidden_factors=FLAGS.num_hidden_factors,
                use_skip_connections=FLAGS.use_skip_connections,
                autoencode=FLAGS.autoencode))

    solver = neurd.CounterfactualNeurdSolver(game, FLAGS.alpha, models)

    def _train(model, data):
        neurd.train(model,
                    data,
                    batch_size=FLAGS.batch_size,
                    step_size=FLAGS.step_size,
                    alpha=FLAGS.alpha,
                    threshold=FLAGS.threshold,
                    autoencoder_loss=(tf.compat.v1.losses.huber_loss
                                      if FLAGS.autoencode else None))

    conv = 100

    # exploitabilities = []
    # start_time = time.time()
    for i in range(FLAGS.iterations):
        # send i into the function to notify the adaptation of alpha

        if FLAGS.adaptive_alpha:
            solver.evaluate_and_update_policy(
                _train,
                current_iteration=i,
                alpha=FLAGS.alpha,
                increase=FLAGS.increase,
                gamma=FLAGS.gamma,
                adaptive_policy=FLAGS.adaptive_policy,
                total_iteration=FLAGS.iterations,
                semi_percent=FLAGS.semi_percent,
                exploit_rate=FLAGS.exploit_rate,
                conv=conv,
                exp_exploit_rate=FLAGS.exp_exploit_rate)
        else:
            solver.evaluate_and_update_policy(_train, alpha=FLAGS.alpha)

        if i % FLAGS.print_freq == 0:
            conv = pyspiel.exploitability(game, solver.average_policy())
            print("Iteration {} exploitability {}".format(i, conv))
def rcfr_train(unused_arg):
    tf.enable_eager_execution()
    game = pyspiel.load_game(FLAGS.game, {"players": pyspiel.GameParameter(2)})
    models = [
        rcfr.DeepRcfrModel(
            game,
            num_hidden_layers=1,
            num_hidden_units=64 if FLAGS.game == "leduc_poker" else 13,
            num_hidden_factors=1,
            use_skip_connections=True) for _ in range(game.num_players())
    ]
    patient = rcfr.RcfrSolver(game, models, False, True)
    exploit_history = list()
    exploit_idx = list()

    def _train(model, data):
        data = data.shuffle(1000)
        data = data.batch(12)
        #data = data.repeat(1)
        optimizer = tf.keras.optimizers.Adam(lr=0.005, amsgrad=True)
        for x, y in data:
            optimizer.minimize(
                lambda: tf.losses.huber_loss(y, model(x)),  # pylint: disable=cell-var-from-loop
                model.trainable_variables)

    agent_name = "rcfr"
    checkpoint = datetime.now()
    for iteration in range(FLAGS.episodes):
        if (iteration % 100) == 0:
            delta = datetime.now() - checkpoint
            conv = pyspiel.exploitability(game, patient.average_policy())
            exploit_idx.append(iteration)
            exploit_history.append(conv)
            print(
                "[RCFR] Iteration {} exploitability {} - {} seconds since last checkpoint"
                .format(iteration, conv, delta.seconds))
            checkpoint = datetime.now()
        patient.evaluate_and_update_policy(_train)

    pickle.dump([exploit_idx, exploit_history],
                open(
                    FLAGS.game + "_" + agent_name + "_" + str(FLAGS.episodes) +
                    ".dat", "wb"))

    now = datetime.now()
    policy = patient.average_policy()

    for pid in [1, 2]:
        policy_to_csv(
            game, policy, f"policies/policy_" +
            now.strftime("%m-%d-%Y_%H-%M") + "_" + agent_name + "_" +
            str(pid + 1) + "_+" + str(FLAGS.episodes) + "episodes.csv")
Esempio n. 7
0
def main(_):
    game = pyspiel.load_game(FLAGS.game,
                             {"players": pyspiel.GameParameter(FLAGS.players)})

    models = []
    for _ in range(game.num_players()):
        models.append(
            rcfr.DeepRcfrModel(
                game,
                num_hidden_layers=FLAGS.num_hidden_layers,
                num_hidden_units=FLAGS.num_hidden_units,
                num_hidden_factors=FLAGS.num_hidden_factors,
                use_skip_connections=FLAGS.use_skip_connections))

    if FLAGS.buffer_size > 0:
        solver = rcfr.ReservoirRcfrSolver(
            game,
            models,
            FLAGS.buffer_size,
            truncate_negative=FLAGS.truncate_negative)
    else:
        solver = rcfr.RcfrSolver(game,
                                 models,
                                 truncate_negative=FLAGS.truncate_negative,
                                 bootstrap=FLAGS.bootstrap)

    def _train_fn(model, data):
        """Train `model` on `data`."""
        data = data.shuffle(FLAGS.batch_size * 10)
        data = data.batch(FLAGS.batch_size)
        data = data.repeat(FLAGS.num_epochs)

        optimizer = tf.keras.optimizers.Adam(lr=FLAGS.step_size, amsgrad=True)

        @tf.function
        def _train():
            for x, y in data:
                optimizer.minimize(
                    lambda: tf.compat.v1.losses.huber_loss(
                        y, model(x), delta=0.01),  # pylint: disable=cell-var-from-loop
                    model.trainable_variables)

        _train()

    # End of _train_fn

    for i in range(FLAGS.iterations):
        solver.evaluate_and_update_policy(_train_fn)
        if i % FLAGS.print_freq == 0:
            conv = pyspiel.exploitability(game, solver.average_policy())
            print("Iteration {} exploitability {}".format(i, conv))
def neurd_train(unudes_arg):
    tf.enable_eager_execution()

    game = pyspiel.load_game(FLAGS.game, {"players": pyspiel.GameParameter(2)})

    models = []
    for _ in range(game.num_players()):
        models.append(
            neurd.DeepNeurdModel(game,
                                 num_hidden_layers=1,
                                 num_hidden_units=13,
                                 num_hidden_factors=8,
                                 use_skip_connections=True,
                                 autoencode=False))
    solver = neurd.CounterfactualNeurdSolver(game, models)

    def _train(model, data):
        neurd.train(model,
                    data,
                    batch_size=100,
                    step_size=1,
                    threshold=2,
                    autoencoder_loss=(None))

    exploit_history = list()
    for ep in range(FLAGS.episodes):
        solver.evaluate_and_update_policy(_train)
        if ep % 100 == 0:
            conv = pyspiel.exploitability(game, solver.average_policy())
            exploit_history.append(conv)
            print("Iteration {} exploitability {}".format(ep, conv))

    now = datetime.now()
    policy = solver.average_policy()
    agent_name = "neurd"
    for pid in [1, 2]:
        policy_to_csv(
            game, policy,
            f"policies/policy_" + now.strftime("%m-%d-%Y_%H-%M") + "_" +
            agent_name + "_" + str(pid + 1) + "_+" + str(ep) + "episodes.csv")

    plt.plot([i for i in range(len(exploit_history))], exploit_history)
    plt.ylim(0.01, 1)
    plt.yticks([1, 0.1, 0.01])
    plt.yscale("log")
    plt.xscale("log")
    plt.show()
Esempio n. 9
0
def run_iterations(game, solver, start_iteration=0):
    """Run iterations of MCCFR."""
    for i in range(int(FLAGS.iterations / 2)):
        solver.run_iteration()
        policy = solver.average_policy()
        exploitability = pyspiel.exploitability(game, policy)

        # We also compute NashConv to highlight an important API feature:
        # when using Monte Carlo sampling, the policy
        # may not have a table entry for every info state.
        # Therefore, when calling nash_conv, ensure the third argument,
        # "use_state_get_policy" is set to True
        # See https://github.com/deepmind/open_spiel/issues/500
        nash_conv = pyspiel.nash_conv(game, policy, True)

        print("Iteration {} nashconv: {:.6f} exploitability: {:.6f}".format(
            start_iteration + i, nash_conv, exploitability))
Esempio n. 10
0
def main(_):
  game = pyspiel.load_game(
      FLAGS.game,
      {"players": pyspiel.GameParameter(FLAGS.players)},
  )

  if FLAGS.sampling == "external":
    solver = pyspiel.ExternalSamplingMCCFRSolver(
        game,
        avg_type=pyspiel.MCCFRAverageType.FULL,
    )
  elif FLAGS.sampling == "outcome":
    solver = pyspiel.OutcomeSamplingMCCFRSolver(game)

  for i in range(FLAGS.iterations):
    solver.run_iteration()
    print("Iteration {} exploitability: {:.6f}".format(
        i, pyspiel.exploitability(game, solver.average_policy())))