def main(unused_argv):
    logging.info("Loading %s", FLAGS.game_name)
    game = pyspiel.load_game(FLAGS.game_name)
    with tf.Session() as sess:
        deep_cfr_solver = deep_cfr.DeepCFRSolver(
            sess,
            game,
            policy_network_layers=(32, 32),
            advantage_network_layers=(16, 16),
            num_iterations=FLAGS.num_iterations,
            num_traversals=FLAGS.num_traversals,
            learning_rate=1e-3,
            batch_size_advantage=None,
            batch_size_strategy=None,
            memory_capacity=1e7)
        sess.run(tf.global_variables_initializer())
        _, advantage_losses, policy_loss = deep_cfr_solver.solve()
        for player, losses in six.iteritems(advantage_losses):
            logging.info("Advantage for player %d: %s", player,
                         losses[:2] + ["..."] + losses[-2:])
            logging.info("Advantage Buffer Size for player %s: '%s'", player,
                         len(deep_cfr_solver.advantage_buffers[player]))
        logging.info("Strategy Buffer Size: '%s'",
                     len(deep_cfr_solver.strategy_buffer))
        logging.info("Final policy loss: '%s'", policy_loss)
        conv = exploitability.nash_conv(
            game,
            policy.PolicyFromCallable(game,
                                      deep_cfr_solver.action_probabilities))
        logging.info("Deep CFR in '%s' - NashConv: %s", FLAGS.game_name, conv)
Exemple #2
0
def DEEPCFR_Solving(game, iterations, save_every=0, save_prefix='base', num_travers=40,
                    lr=1e-3, policy_layers=(32, 32), advantage_layers=(16, 16)):
    def save_deepcfr():  # and print some info i guess?
        print("---------iteration " + str(it) + "----------")
        for player, losses in six.iteritems(advantage_losses):
            print("Advantage for player ", player, losses)
            print("Advantage Buffer Size for player", player,
                  len(deep_cfr_solver.advantage_buffers[player]))
        print("Strategy Buffer Size: ",
              len(deep_cfr_solver.strategy_buffer))
        print("policy loss: ", policy_loss)
        callable_policy = tabular_policy_from_callable(game, deep_cfr_solver.action_probabilities)
        tabular_policy = tabular_policy_from_callable(game, callable_policy)
        policy = dict(zip(tabular_policy.state_lookup, tabular_policy.action_probability_array))
        # save under map (save_prefix)_(num_travers)
        return policy_handler.save_to_tabular_policy(game, policy, "policies/DEEPCFR/{}/{}".format(
            save_prefix + "_" + str(num_travers), it))

    with tf.Session() as sess:
        # set num iters and call solve() multiple times to allow intermediate saving and eval
        deep_cfr_solver = deep_cfr.DeepCFRSolver(sess, game, policy_network_layers=policy_layers,
                                                 advantage_network_layers=advantage_layers, num_iterations=1,
                                                 num_traversals=num_travers, learning_rate=lr)
        sess.run(tf.global_variables_initializer())

        for it in range(iterations + 1):
            _, advantage_losses, policy_loss = deep_cfr_solver.solve()
            if save_every != 0 and it % save_every == 0:
                save_deepcfr()
        return save_deepcfr()
Exemple #3
0
 def test_deep_cfr_runs(self, game_name):
     game = pyspiel.load_game(game_name)
     with tf.Session() as sess:
         deep_cfr_solver = deep_cfr.DeepCFRSolver(
             sess,
             game,
             policy_network_layers=(8, 4),
             advantage_network_layers=(4, 2),
             num_iterations=2,
             num_traversals=2,
             learning_rate=1e-3,
             batch_size_advantage=None,
             batch_size_strategy=None,
             memory_capacity=1e7)
         sess.run(tf.global_variables_initializer())
         deep_cfr_solver.solve()
def main(unused_argv):
    logging.info("Loading %s", FLAGS.game_name)
    game = pyspiel.load_game(FLAGS.game_name)
    with tf.Session() as sess:
        deep_cfr_solver = deep_cfr.DeepCFRSolver(
            sess,
            game,
            policy_network_layers=(16, ),
            advantage_network_layers=(16, ),
            num_iterations=FLAGS.num_iterations,
            num_traversals=FLAGS.num_traversals,
            learning_rate=1e-3,
            batch_size_advantage=128,
            batch_size_strategy=1024,
            memory_capacity=1e7,
            policy_network_train_steps=400,
            advantage_network_train_steps=20,
            reinitialize_advantage_networks=False)
        sess.run(tf.global_variables_initializer())
        _, advantage_losses, policy_loss = deep_cfr_solver.solve()
        for player, losses in six.iteritems(advantage_losses):
            logging.info("Advantage for player %d: %s", player,
                         losses[:2] + ["..."] + losses[-2:])
            logging.info("Advantage Buffer Size for player %s: '%s'", player,
                         len(deep_cfr_solver.advantage_buffers[player]))
        logging.info("Strategy Buffer Size: '%s'",
                     len(deep_cfr_solver.strategy_buffer))
        logging.info("Final policy loss: '%s'", policy_loss)

        average_policy = policy.tabular_policy_from_callable(
            game, deep_cfr_solver.action_probabilities)

        conv = exploitability.nash_conv(game, average_policy)
        logging.info("Deep CFR in '%s' - NashConv: %s", FLAGS.game_name, conv)

        average_policy_values = expected_game_score.policy_value(
            game.new_initial_state(), [average_policy] * 2)
        print("Computed player 0 value: {}".format(average_policy_values[0]))
        print("Expected player 0 value: {}".format(-1 / 18))
        print("Computed player 1 value: {}".format(average_policy_values[1]))
        print("Expected player 1 value: {}".format(1 / 18))
Exemple #5
0
 def test_matching_pennies_3p(self):
   # We don't expect Deep CFR to necessarily converge on 3-player games but
   # it's nonetheless interesting to see this result.
   game = pyspiel.load_game_as_turn_based('matching_pennies_3p')
   with tf.Session() as sess:
     deep_cfr_solver = deep_cfr.DeepCFRSolver(
         sess,
         game,
         policy_network_layers=(16, 8),
         advantage_network_layers=(32, 16),
         num_iterations=2,
         num_traversals=2,
         learning_rate=1e-3,
         batch_size_advantage=None,
         batch_size_strategy=None,
         memory_capacity=1e7)
     sess.run(tf.global_variables_initializer())
     deep_cfr_solver.solve()
     conv = exploitability.nash_conv(
         game,
         policy.tabular_policy_from_callable(
             game, deep_cfr_solver.action_probabilities))
     print('Deep CFR in Matching Pennies 3p. NashConv: {}'.format(conv))