def test_kuhn_cfr_checkpointing(self): game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH) cfr = Cfr(game, show_progress=False) checkpoints_count = 0 def checkpoint_callback(game_tree, checkpoint_index, iterations): nonlocal checkpoints_count self.assertTrue(game_tree is not None) self.assertEqual(checkpoint_index, checkpoints_count) checkpoints_count += 1 cfr.train(60, weight_delay=15, checkpoint_iterations=15, checkpoint_callback=checkpoint_callback) self.assertEqual(checkpoints_count, 3)
def create_agent_strategy( game_file_path, tilt_action, tilt_type, tilt_probability, cfr_iterations=2000, cfr_weight_delay=700, show_progress=True): game = acpc.read_game_file(game_file_path) cfr = Cfr(game, show_progress=show_progress) cfr.train(cfr_iterations, cfr_weight_delay) return create_agent_strategy_from_trained_strategy( game_file_path, cfr.game_tree, tilt_action, tilt_type, tilt_probability, True)
def test_leduc_cfr_works(self): game = acpc.read_game_file(LEDUC_POKER_GAME_FILE_PATH) cfr = Cfr(game, show_progress=False) cfr.train(5, weight_delay=2)
def test_kuhn_bigdeck_2round_cfr_works(self): game = acpc.read_game_file(KUHN_BIG_DECK_2ROUND_POKER_GAME_FILE_PATH) cfr = Cfr(game, show_progress=False) cfr.train(5, weight_delay=2)
def train_and_show_results(self, test_spec): game = acpc.read_game_file(test_spec['game_file_path']) exploitability = Exploitability(game) iteration_counts = np.zeros(0) exploitability_values = np.zeros([1, 0]) best_exploitability = float("inf") best_exploitability_strategy = GameTreeBuilder( game, StrategyTreeNodeProvider()).build_tree() def checkpoint_callback(game_tree, checkpoint_index, iterations): nonlocal iteration_counts nonlocal exploitability_values nonlocal best_exploitability nonlocal best_exploitability_strategy iteration_counts = np.append(iteration_counts, iterations) if CHECK_STRATEGY_CORRECTNESS: self.assertTrue(is_correct_strategy(game_tree)) exploitability_value = exploitability.evaluate(game_tree) exploitability_values = np.append(exploitability_values, exploitability_value) if COLLECT_MIN_EXPLOITABILITY and exploitability_value < best_exploitability: best_exploitability = exploitability_value copy_strategy(best_exploitability_strategy, game_tree) cfr = Cfr(game) cfr.train(test_spec['training_iterations'], weight_delay=test_spec['weight_delay'], checkpoint_iterations=test_spec['checkpoint_iterations'], checkpoint_callback=checkpoint_callback, minimal_action_probability=0.00006) best_response = BestResponse(game).solve(cfr.game_tree) player_utilities, _ = PlayerUtility(game).evaluate( cfr.game_tree, best_response) print(player_utilities.tolist()) print('Exploitability: %s' % exploitability.evaluate(cfr.game_tree)) if COLLECT_MIN_EXPLOITABILITY: min_exploitability = exploitability.evaluate( best_exploitability_strategy) min_exploitability_best_response = BestResponse(game).solve( best_exploitability_strategy) min_exploitability_player_utilities, _ = PlayerUtility( game).evaluate(best_exploitability_strategy, min_exploitability_best_response) self.assertEqual(min_exploitability, exploitability_values.min()) print('Minimum exploitability: %s' % min_exploitability) print('Minimum exploitability player utilities: %s' % min_exploitability_player_utilities.tolist()) else: print('Minimum exploitability: %s' % exploitability_values.min()) plt.figure(dpi=160) plt.plot(iteration_counts, exploitability_values, linewidth=0.8) plt.title(test_spec['title']) plt.xlabel('Training iterations') plt.ylabel('Strategy exploitability [mbb/g]') plt.grid() game_name = test_spec['game_file_path'].split('/')[1][:-5] figure_output_path = '%s/%s(it:%s-st:%s).png' % ( FIGURES_FOLDER, game_name, test_spec['training_iterations'], test_spec['checkpoint_iterations']) figures_directory = os.path.dirname(figure_output_path) if not os.path.exists(figures_directory): os.makedirs(figures_directory) plt.savefig(figure_output_path) write_strategy_to_file( cfr.game_tree, '%s/%s(it:%s).strategy' % (FIGURES_FOLDER, game_name, test_spec['training_iterations']), [ '# Game utility against best response: %s' % player_utilities.tolist() ])
strategy_file_lines_sorted = sorted(strategy_file_lines) progress.update(1) except NameError: strategy_file_lines_sorted = sorted(strategy_file_lines) strategy_file_lines_sorted = ['# Training iterations: %s\n' % iterations ] + strategy_file_lines_sorted try: with tqdm(total=1) as progress: progress.set_description('Writing strategy file') _write_to_output_file(output_path, strategy_file_lines_sorted) progress.update(1) except NameError: _write_to_output_file(output_path, strategy_file_lines_sorted) if __name__ == "__main__": if len(sys.argv) < 4: print("Usage {game_file_path} {iterations} {strategy_output_path}") sys.exit(1) iterations = int(sys.argv[2]) output_path = sys.argv[3] game = acpc.read_game_file(sys.argv[1]) cfr = Cfr(game) cfr.train(iterations) _write_strategy(cfr.game_tree, iterations, output_path)