def checkpoint_callback(game_tree, checkpoint_index, iterations):
     if i == 0:
         iteration_counts[checkpoint_index] = iterations
     self.assertTrue(is_correct_strategy(game_tree))
     exploitability_values[i, checkpoint_index] = exp.evaluate(
         game_tree)
     vs_opponent_utility_values[i, checkpoint_index] = exp.evaluate(
         opponent_strategy, game_tree)
 def test_leduc_multiply_action_tilted_agent_not_crashing(self):
     strategy = create_agent_strategy(LEDUC_POKER_GAME_FILE_PATH,
                                      Action.FOLD,
                                      TiltType.MULTIPLY,
                                      0.1,
                                      cfr_iterations=5,
                                      cfr_weight_delay=2,
                                      show_progress=False)
     self.assertTrue(is_correct_strategy(strategy))
 def test_kuhn_action_tilted_agent_not_crashing(self):
     strategy = create_agent_strategy(KUHN_POKER_GAME_FILE_PATH,
                                      Action.RAISE,
                                      TiltType.ADD,
                                      0.2,
                                      cfr_iterations=20,
                                      cfr_weight_delay=2,
                                      show_progress=False)
     self.assertTrue(is_correct_strategy(strategy))
Example #4
0
        def checkpoint_callback(game_tree, checkpoint_index, iterations):
            nonlocal iteration_counts
            nonlocal exploitability_values
            nonlocal best_exploitability
            nonlocal best_exploitability_strategy

            iteration_counts = np.append(iteration_counts, iterations)

            if CHECK_STRATEGY_CORRECTNESS:
                self.assertTrue(is_correct_strategy(game_tree))

            exploitability_value = exploitability.evaluate(game_tree)
            exploitability_values = np.append(exploitability_values,
                                              exploitability_value)
            if COLLECT_MIN_EXPLOITABILITY and exploitability_value < best_exploitability:
                best_exploitability = exploitability_value
                copy_strategy(best_exploitability_strategy, game_tree)
Example #5
0
    def train_and_show_results(self, test_spec):
        game_file_path = test_spec['game_file_path']
        game = acpc.read_game_file(game_file_path)

        base_strategy, _ = read_strategy_from_file(
            game_file_path, test_spec['base_strategy_path'])

        opponent = test_spec['opponent']
        opponent_strategy = create_agent_strategy_from_trained_strategy(
            game_file_path, base_strategy, opponent[1], opponent[2],
            opponent[3])

        strategy, exploitability, p = RnrParameterOptimizer(game).train(
            opponent_strategy, test_spec['exploitability'],
            test_spec['max_delta'])

        self.assertTrue(strategy != None)
        self.assertTrue(is_correct_strategy(strategy))
        print('Final exploitability is %s with p of %s' % (exploitability, p))
    def test_kuhn_action_minus_tilted_agent(self):
        kuhn_equilibrium, _ = read_strategy_from_file(
            KUHN_POKER_GAME_FILE_PATH,
            'strategies/kuhn.limit.2p-equilibrium.strategy')

        game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH)
        exploitability = Exploitability(game)

        tilted_agent_strategy = create_agent_strategy_from_trained_strategy(
            KUHN_POKER_GAME_FILE_PATH, kuhn_equilibrium, Action.CALL,
            TiltType.ADD, -0.5)
        self.assertTrue(is_correct_strategy(tilted_agent_strategy))
        self.assertTrue(
            not is_strategies_equal(kuhn_equilibrium, tilted_agent_strategy))

        equilibrium_exploitability = exploitability.evaluate(kuhn_equilibrium)
        raise_add_tilted_exploitability = exploitability.evaluate(
            tilted_agent_strategy)
        self.assertTrue(
            raise_add_tilted_exploitability > equilibrium_exploitability)
    def train_and_show_results(self, test_spec):
        game_file_path = test_spec['game_file_path']
        game = acpc.read_game_file(game_file_path)

        base_strategy, _ = read_strategy_from_file(
            game_file_path, test_spec['base_strategy_path'])

        agents = test_spec['opponent_tilt_types']
        num_agents = len(agents)

        game_name = game_file_path.split('/')[1][:-5]
        overwrite_figure = test_spec[
            'overwrite_figure'] if 'overwrite_figure' in test_spec else False
        figure_path = get_new_path(
            '%s/%s(it:%s-st:%s)' %
            (FIGURES_FOLDER, game_name, test_spec['training_iterations'],
             test_spec['checkpoint_iterations']), '.png', overwrite_figure)
        create_path_dirs(figure_path)

        exp = Exploitability(game)

        checkpoints_count = math.ceil(
            (test_spec['training_iterations'] - 700) /
            test_spec['checkpoint_iterations'])
        iteration_counts = np.zeros(checkpoints_count)
        exploitability_values = np.zeros([num_agents, checkpoints_count])
        vs_opponent_utility_values = np.zeros([num_agents, checkpoints_count])
        opponent_exploitability_values = np.zeros(num_agents)
        for i, agent in enumerate(agents):
            print('%s/%s' % (i + 1, num_agents))

            opponent_strategy = create_agent_strategy_from_trained_strategy(
                game_file_path, base_strategy, agent[0], agent[1], agent[2])

            self.assertTrue(is_correct_strategy(opponent_strategy))

            if 'print_opponent_strategies' in test_spec and test_spec[
                    'print_opponent_strategies']:
                write_strategy_to_file(
                    opponent_strategy, '%s/%s.strategy' %
                    (os.path.dirname(figure_path), get_agent_name(agent)))

            if 'print_best_responses' in test_spec and test_spec[
                    'print_best_responses']:
                opponent_best_response = BestResponse(game).solve(
                    opponent_strategy)
                write_strategy_to_file(
                    opponent_best_response, '%s/%s-best_response.strategy' %
                    (os.path.dirname(figure_path), get_agent_name(agent)))

            if PLOT_OPPONENT_EXPLOITABILITY:
                opponent_exploitability = exp.evaluate(opponent_strategy)
                opponent_exploitability_values[i] = opponent_exploitability
                print('%s exploitability: %s' %
                      (get_agent_name(agent), opponent_exploitability))

            def checkpoint_callback(game_tree, checkpoint_index, iterations):
                if i == 0:
                    iteration_counts[checkpoint_index] = iterations
                self.assertTrue(is_correct_strategy(game_tree))
                exploitability_values[i, checkpoint_index] = exp.evaluate(
                    game_tree)
                vs_opponent_utility_values[i, checkpoint_index] = exp.evaluate(
                    opponent_strategy, game_tree)

            rnr = RestrictedNashResponse(game, opponent_strategy, agent[3])
            rnr.train(test_spec['training_iterations'],
                      checkpoint_iterations=test_spec['checkpoint_iterations'],
                      checkpoint_callback=checkpoint_callback)

            if 'print_response_strategies' in test_spec and test_spec[
                    'print_response_strategies']:
                write_strategy_to_file(
                    rnr.game_tree,
                    '%s-%s-p=%s.strategy' % (figure_path[:-len('.png')],
                                             get_agent_name(agent), agent[3]))

            print('Vs opponent value: %s' %
                  exp.evaluate(opponent_strategy, rnr.game_tree))
            print('Exploitability: %s' % exp.evaluate(rnr.game_tree))

            plt.figure(dpi=300)
            ax = plt.subplot(111)
            for j in range(i + 1):
                p = plt.plot(iteration_counts,
                             exploitability_values[j],
                             label='%s-p=%s exploitability' %
                             (get_agent_name(agents[j]), agents[j][3]),
                             linewidth=LINE_WIDTH)
                plt.plot(iteration_counts,
                         vs_opponent_utility_values[j],
                         '--',
                         label='Utility against opponent strategy',
                         color=p[0].get_color(),
                         linewidth=LINE_WIDTH)
                if PLOT_OPPONENT_EXPLOITABILITY:
                    plt.plot(iteration_counts,
                             np.ones(checkpoints_count) *
                             opponent_exploitability_values[j],
                             ':',
                             label='Opponent exploitability',
                             color=p[0].get_color(),
                             linewidth=LINE_WIDTH)

            plt.title(test_spec['title'])
            plt.xlabel('Training iterations')
            plt.ylabel('Strategy exploitability [mbb/g]')
            plt.grid()
            handles, labels = ax.get_legend_handles_labels()
            new_handles = []
            new_labels = []
            for i in range(PLOT_COUNT_PER_AGENT):
                for j in range(i, len(handles), PLOT_COUNT_PER_AGENT):
                    new_handles += [handles[j]]
                    new_labels += [labels[j]]
            lgd = plt.legend(new_handles,
                             new_labels,
                             loc='upper center',
                             bbox_to_anchor=(0.5, -0.1),
                             ncol=PLOT_COUNT_PER_AGENT)

            plt.savefig(figure_path,
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')

        print('Figure written to %s' % figure_path)
    def train_and_show_results(self, test_spec):
        game = acpc.read_game_file(test_spec['game_file_path'])

        weak_opponent_samples_tree = GameTreeBuilder(
            game, SamplesTreeNodeProvider()).build_tree()
        weak_opponent_strategy_tree = GameTreeBuilder(
            game, StrategyTreeNodeProvider()).build_tree()

        def on_node(samples_node, strategy_node):
            if isinstance(samples_node, ActionNode):
                child_count = len(samples_node.children)
                samples_count = random.randrange(15)
                for i, a in enumerate(samples_node.children):
                    if i < (child_count - 1) and samples_count > 0:
                        action_samples_count = random.randrange(samples_count +
                                                                1)
                        samples_count -= action_samples_count
                        samples_node.action_decision_counts[
                            a] = action_samples_count
                    else:
                        samples_node.action_decision_counts[a] = samples_count
                samples_sum = np.sum(samples_node.action_decision_counts)
                if samples_sum > 0:
                    strategy_node.strategy = samples_node.action_decision_counts / samples_sum
                else:
                    for a in strategy_node.children:
                        strategy_node.strategy[a] = 1 / len(
                            strategy_node.children)

        walk_trees(on_node, weak_opponent_samples_tree,
                   weak_opponent_strategy_tree)

        self.assertTrue(is_correct_strategy(weak_opponent_strategy_tree))

        exploitability = Exploitability(game)
        num_test_counts = test_spec['test_counts']
        data = np.zeros([num_test_counts, 2, len(P_MAX_VALUES)])
        for i in range(num_test_counts):
            print('%s/%s' % (i + 1, num_test_counts))

            for j, p_max in enumerate(P_MAX_VALUES):
                print('Pmax: %s - %s/%s' % (p_max, j + 1, len(P_MAX_VALUES)))

                dbr = DataBiasedResponse(game,
                                         weak_opponent_samples_tree,
                                         p_max=p_max)
                dbr.train(test_spec['training_iterations'])

                data[i, 0, j] = exploitability.evaluate(dbr.game_tree)
                data[i, 1,
                     j] = exploitability.evaluate(weak_opponent_strategy_tree,
                                                  dbr.game_tree)

                plt.figure(dpi=160)
                for k in range(i + 1):
                    run_index = math.floor(k / 2)
                    xdata = data[k,
                                 0, :] if k < i or j == (len(P_MAX_VALUES) -
                                                         1) else data[k, 0,
                                                                      0:j + 1]
                    ydata = data[k,
                                 1, :] if k < i or j == (len(P_MAX_VALUES) -
                                                         1) else data[k, 1,
                                                                      0:j + 1]
                    plt.plot(xdata,
                             ydata,
                             label='Run %s' % (run_index + 1),
                             marker='o',
                             linewidth=0.8)

                if 'title' in test_spec:
                    plt.title(test_spec['title'])
                plt.xlabel('DBR trained strategy exploitability [mbb/g]')
                plt.ylabel(
                    'Random opponent exploitation by DBR strategy [mbb/g]')
                plt.grid()
                if num_test_counts > 1:
                    plt.legend()

                game_name = test_spec['game_file_path'].split('/')[1][:-5]
                figure_output_path = '%s/%s(it:%s).png' % (
                    FIGURES_FOLDER, game_name,
                    test_spec['training_iterations'])

                figures_directory = os.path.dirname(figure_output_path)
                if not os.path.exists(figures_directory):
                    os.makedirs(figures_directory)

                plt.savefig(figure_output_path)

        print('\033[91mThis test needs your assistance! ' +
              'Check the generated graph %s!\033[0m' % figure_output_path)