def checkpoint_callback(game_tree, checkpoint_index, iterations): if i == 0: iteration_counts[checkpoint_index] = iterations self.assertTrue(is_correct_strategy(game_tree)) exploitability_values[i, checkpoint_index] = exp.evaluate( game_tree) vs_opponent_utility_values[i, checkpoint_index] = exp.evaluate( opponent_strategy, game_tree)
def test_leduc_multiply_action_tilted_agent_not_crashing(self): strategy = create_agent_strategy(LEDUC_POKER_GAME_FILE_PATH, Action.FOLD, TiltType.MULTIPLY, 0.1, cfr_iterations=5, cfr_weight_delay=2, show_progress=False) self.assertTrue(is_correct_strategy(strategy))
def test_kuhn_action_tilted_agent_not_crashing(self): strategy = create_agent_strategy(KUHN_POKER_GAME_FILE_PATH, Action.RAISE, TiltType.ADD, 0.2, cfr_iterations=20, cfr_weight_delay=2, show_progress=False) self.assertTrue(is_correct_strategy(strategy))
def checkpoint_callback(game_tree, checkpoint_index, iterations): nonlocal iteration_counts nonlocal exploitability_values nonlocal best_exploitability nonlocal best_exploitability_strategy iteration_counts = np.append(iteration_counts, iterations) if CHECK_STRATEGY_CORRECTNESS: self.assertTrue(is_correct_strategy(game_tree)) exploitability_value = exploitability.evaluate(game_tree) exploitability_values = np.append(exploitability_values, exploitability_value) if COLLECT_MIN_EXPLOITABILITY and exploitability_value < best_exploitability: best_exploitability = exploitability_value copy_strategy(best_exploitability_strategy, game_tree)
def train_and_show_results(self, test_spec): game_file_path = test_spec['game_file_path'] game = acpc.read_game_file(game_file_path) base_strategy, _ = read_strategy_from_file( game_file_path, test_spec['base_strategy_path']) opponent = test_spec['opponent'] opponent_strategy = create_agent_strategy_from_trained_strategy( game_file_path, base_strategy, opponent[1], opponent[2], opponent[3]) strategy, exploitability, p = RnrParameterOptimizer(game).train( opponent_strategy, test_spec['exploitability'], test_spec['max_delta']) self.assertTrue(strategy != None) self.assertTrue(is_correct_strategy(strategy)) print('Final exploitability is %s with p of %s' % (exploitability, p))
def test_kuhn_action_minus_tilted_agent(self): kuhn_equilibrium, _ = read_strategy_from_file( KUHN_POKER_GAME_FILE_PATH, 'strategies/kuhn.limit.2p-equilibrium.strategy') game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH) exploitability = Exploitability(game) tilted_agent_strategy = create_agent_strategy_from_trained_strategy( KUHN_POKER_GAME_FILE_PATH, kuhn_equilibrium, Action.CALL, TiltType.ADD, -0.5) self.assertTrue(is_correct_strategy(tilted_agent_strategy)) self.assertTrue( not is_strategies_equal(kuhn_equilibrium, tilted_agent_strategy)) equilibrium_exploitability = exploitability.evaluate(kuhn_equilibrium) raise_add_tilted_exploitability = exploitability.evaluate( tilted_agent_strategy) self.assertTrue( raise_add_tilted_exploitability > equilibrium_exploitability)
def train_and_show_results(self, test_spec): game_file_path = test_spec['game_file_path'] game = acpc.read_game_file(game_file_path) base_strategy, _ = read_strategy_from_file( game_file_path, test_spec['base_strategy_path']) agents = test_spec['opponent_tilt_types'] num_agents = len(agents) game_name = game_file_path.split('/')[1][:-5] overwrite_figure = test_spec[ 'overwrite_figure'] if 'overwrite_figure' in test_spec else False figure_path = get_new_path( '%s/%s(it:%s-st:%s)' % (FIGURES_FOLDER, game_name, test_spec['training_iterations'], test_spec['checkpoint_iterations']), '.png', overwrite_figure) create_path_dirs(figure_path) exp = Exploitability(game) checkpoints_count = math.ceil( (test_spec['training_iterations'] - 700) / test_spec['checkpoint_iterations']) iteration_counts = np.zeros(checkpoints_count) exploitability_values = np.zeros([num_agents, checkpoints_count]) vs_opponent_utility_values = np.zeros([num_agents, checkpoints_count]) opponent_exploitability_values = np.zeros(num_agents) for i, agent in enumerate(agents): print('%s/%s' % (i + 1, num_agents)) opponent_strategy = create_agent_strategy_from_trained_strategy( game_file_path, base_strategy, agent[0], agent[1], agent[2]) self.assertTrue(is_correct_strategy(opponent_strategy)) if 'print_opponent_strategies' in test_spec and test_spec[ 'print_opponent_strategies']: write_strategy_to_file( opponent_strategy, '%s/%s.strategy' % (os.path.dirname(figure_path), get_agent_name(agent))) if 'print_best_responses' in test_spec and test_spec[ 'print_best_responses']: opponent_best_response = BestResponse(game).solve( opponent_strategy) write_strategy_to_file( opponent_best_response, '%s/%s-best_response.strategy' % (os.path.dirname(figure_path), get_agent_name(agent))) if PLOT_OPPONENT_EXPLOITABILITY: opponent_exploitability = exp.evaluate(opponent_strategy) opponent_exploitability_values[i] = opponent_exploitability print('%s exploitability: %s' % (get_agent_name(agent), opponent_exploitability)) def checkpoint_callback(game_tree, checkpoint_index, iterations): if i == 0: iteration_counts[checkpoint_index] = iterations self.assertTrue(is_correct_strategy(game_tree)) exploitability_values[i, checkpoint_index] = exp.evaluate( game_tree) vs_opponent_utility_values[i, checkpoint_index] = exp.evaluate( opponent_strategy, game_tree) rnr = RestrictedNashResponse(game, opponent_strategy, agent[3]) rnr.train(test_spec['training_iterations'], checkpoint_iterations=test_spec['checkpoint_iterations'], checkpoint_callback=checkpoint_callback) if 'print_response_strategies' in test_spec and test_spec[ 'print_response_strategies']: write_strategy_to_file( rnr.game_tree, '%s-%s-p=%s.strategy' % (figure_path[:-len('.png')], get_agent_name(agent), agent[3])) print('Vs opponent value: %s' % exp.evaluate(opponent_strategy, rnr.game_tree)) print('Exploitability: %s' % exp.evaluate(rnr.game_tree)) plt.figure(dpi=300) ax = plt.subplot(111) for j in range(i + 1): p = plt.plot(iteration_counts, exploitability_values[j], label='%s-p=%s exploitability' % (get_agent_name(agents[j]), agents[j][3]), linewidth=LINE_WIDTH) plt.plot(iteration_counts, vs_opponent_utility_values[j], '--', label='Utility against opponent strategy', color=p[0].get_color(), linewidth=LINE_WIDTH) if PLOT_OPPONENT_EXPLOITABILITY: plt.plot(iteration_counts, np.ones(checkpoints_count) * opponent_exploitability_values[j], ':', label='Opponent exploitability', color=p[0].get_color(), linewidth=LINE_WIDTH) plt.title(test_spec['title']) plt.xlabel('Training iterations') plt.ylabel('Strategy exploitability [mbb/g]') plt.grid() handles, labels = ax.get_legend_handles_labels() new_handles = [] new_labels = [] for i in range(PLOT_COUNT_PER_AGENT): for j in range(i, len(handles), PLOT_COUNT_PER_AGENT): new_handles += [handles[j]] new_labels += [labels[j]] lgd = plt.legend(new_handles, new_labels, loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=PLOT_COUNT_PER_AGENT) plt.savefig(figure_path, bbox_extra_artists=(lgd, ), bbox_inches='tight') print('Figure written to %s' % figure_path)
def train_and_show_results(self, test_spec): game = acpc.read_game_file(test_spec['game_file_path']) weak_opponent_samples_tree = GameTreeBuilder( game, SamplesTreeNodeProvider()).build_tree() weak_opponent_strategy_tree = GameTreeBuilder( game, StrategyTreeNodeProvider()).build_tree() def on_node(samples_node, strategy_node): if isinstance(samples_node, ActionNode): child_count = len(samples_node.children) samples_count = random.randrange(15) for i, a in enumerate(samples_node.children): if i < (child_count - 1) and samples_count > 0: action_samples_count = random.randrange(samples_count + 1) samples_count -= action_samples_count samples_node.action_decision_counts[ a] = action_samples_count else: samples_node.action_decision_counts[a] = samples_count samples_sum = np.sum(samples_node.action_decision_counts) if samples_sum > 0: strategy_node.strategy = samples_node.action_decision_counts / samples_sum else: for a in strategy_node.children: strategy_node.strategy[a] = 1 / len( strategy_node.children) walk_trees(on_node, weak_opponent_samples_tree, weak_opponent_strategy_tree) self.assertTrue(is_correct_strategy(weak_opponent_strategy_tree)) exploitability = Exploitability(game) num_test_counts = test_spec['test_counts'] data = np.zeros([num_test_counts, 2, len(P_MAX_VALUES)]) for i in range(num_test_counts): print('%s/%s' % (i + 1, num_test_counts)) for j, p_max in enumerate(P_MAX_VALUES): print('Pmax: %s - %s/%s' % (p_max, j + 1, len(P_MAX_VALUES))) dbr = DataBiasedResponse(game, weak_opponent_samples_tree, p_max=p_max) dbr.train(test_spec['training_iterations']) data[i, 0, j] = exploitability.evaluate(dbr.game_tree) data[i, 1, j] = exploitability.evaluate(weak_opponent_strategy_tree, dbr.game_tree) plt.figure(dpi=160) for k in range(i + 1): run_index = math.floor(k / 2) xdata = data[k, 0, :] if k < i or j == (len(P_MAX_VALUES) - 1) else data[k, 0, 0:j + 1] ydata = data[k, 1, :] if k < i or j == (len(P_MAX_VALUES) - 1) else data[k, 1, 0:j + 1] plt.plot(xdata, ydata, label='Run %s' % (run_index + 1), marker='o', linewidth=0.8) if 'title' in test_spec: plt.title(test_spec['title']) plt.xlabel('DBR trained strategy exploitability [mbb/g]') plt.ylabel( 'Random opponent exploitation by DBR strategy [mbb/g]') plt.grid() if num_test_counts > 1: plt.legend() game_name = test_spec['game_file_path'].split('/')[1][:-5] figure_output_path = '%s/%s(it:%s).png' % ( FIGURES_FOLDER, game_name, test_spec['training_iterations']) figures_directory = os.path.dirname(figure_output_path) if not os.path.exists(figures_directory): os.makedirs(figures_directory) plt.savefig(figure_output_path) print('\033[91mThis test needs your assistance! ' + 'Check the generated graph %s!\033[0m' % figure_output_path)