Ejemplo n.º 1
0
 def test_deep_cfr_runs(self, game_name):
     game = pyspiel.load_game(game_name)
     deep_cfr_solver = deep_cfr.DeepCFRSolver(game,
                                              policy_network_layers=(8, 4),
                                              advantage_network_layers=(4,
                                                                        2),
                                              num_iterations=2,
                                              num_traversals=2,
                                              learning_rate=1e-3,
                                              batch_size_advantage=8,
                                              batch_size_strategy=8,
                                              memory_capacity=1e7)
     deep_cfr_solver.solve()
Ejemplo n.º 2
0
 def test_matching_pennies_3p(self):
     # We don't expect Deep CFR to necessarily converge on 3-player games but
     # it's nonetheless interesting to see this result.
     game = pyspiel.load_game_as_turn_based('matching_pennies_3p')
     deep_cfr_solver = deep_cfr.DeepCFRSolver(game,
                                              policy_network_layers=(16, 8),
                                              advantage_network_layers=(32,
                                                                        16),
                                              num_iterations=2,
                                              num_traversals=2,
                                              learning_rate=1e-3,
                                              batch_size_advantage=8,
                                              batch_size_strategy=8,
                                              memory_capacity=1e7)
     deep_cfr_solver.solve()
     conv = exploitability.nash_conv(
         game,
         policy.tabular_policy_from_callable(
             game, deep_cfr_solver.action_probabilities))
     print('Deep CFR in Matching Pennies 3p. NashConv: {}'.format(conv))
Ejemplo n.º 3
0
def main(unused_argv):
    logging.info("Loading %s", FLAGS.game_name)
    game = pyspiel.load_game(FLAGS.game_name)
    deep_cfr_solver = deep_cfr.DeepCFRSolver(
        game,
        policy_network_layers=(64, 64, 64),
        advantage_network_layers=(64, 64, 64),
        num_iterations=FLAGS.num_iterations,
        num_traversals=FLAGS.num_traversals,
        learning_rate=1e-3,
        batch_size_advantage=2048,
        batch_size_strategy=2048,
        memory_capacity=1e7,
        policy_network_train_steps=5000,
        advantage_network_train_steps=750,
        reinitialize_advantage_networks=True)
    _, advantage_losses, policy_loss = deep_cfr_solver.solve()
    for player, losses in advantage_losses.items():
        logging.info("Advantage for player %d: %s", player,
                     losses[:2] + ["..."] + losses[-2:])
        logging.info("Advantage Buffer Size for player %s: '%s'", player,
                     len(deep_cfr_solver.advantage_buffers[player]))
    logging.info("Strategy Buffer Size: '%s'",
                 len(deep_cfr_solver.strategy_buffer))
    logging.info("Final policy loss: '%s'", policy_loss)

    average_policy = policy.tabular_policy_from_callable(
        game, deep_cfr_solver.action_probabilities)

    conv = exploitability.nash_conv(game, average_policy)
    logging.info("Deep CFR in '%s' - NashConv: %s", FLAGS.game_name, conv)

    average_policy_values = expected_game_score.policy_value(
        game.new_initial_state(), [average_policy] * 2)
    print("Computed player 0 value: {}".format(average_policy_values[0]))
    print("Computed player 1 value: {}".format(average_policy_values[1]))