コード例 #1
0
def read_strategy_from_file(game, strategy_file_path):
    strategy = {}
    with open(strategy_file_path, 'r') as strategy_file:
        for line in strategy_file:
            if not line.strip() or line.strip().startswith('#'):
                continue
            line_split = line.split(' ')
            strategy[line_split[0]] = [
                float(probStr) for probStr in line_split[1:4]
            ]

    if not game:
        return strategy

    game_instance = acpc.read_game_file(game) if isinstance(game,
                                                            str) else game
    strategy_tree = GameTreeBuilder(game_instance,
                                    StrategyTreeNodeProvider()).build_tree()

    def on_node(node):
        if isinstance(node, ActionNode):
            nonlocal strategy
            node_strategy = np.array(strategy[str(node)])
            np.copyto(node.strategy, node_strategy)

    walk_trees(on_node, strategy_tree)
    return strategy_tree, strategy
コード例 #2
0
    def solve(self, strategy):
        game_tree_builder = GameTreeBuilder(self.game, StrategyTreeNodeProvider())
        best_response = game_tree_builder.build_tree()

        for position in range(2):
            self._solve(position, best_response, np.array([[strategy, 1, ()]]), [], [])

        return best_response
コード例 #3
0
    def test_strategy_writing_and_reading(self):
        game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH)
        strategy_tree = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree()

        def on_node(node):
            if isinstance(node, ActionNode):
                for a in range(3):
                    if a in node.children:
                        node.strategy[a] = 0.5
                    else:
                        node.strategy[a] = 7
        walk_trees(on_node, strategy_tree)

        write_strategy_to_file(strategy_tree, 'test/io_test_dummy.strategy')
        read_strategy_tree, _ = read_strategy_from_file(KUHN_POKER_GAME_FILE_PATH, 'test/io_test_dummy.strategy')
        self.assertTrue(is_strategies_equal(strategy_tree, read_strategy_tree))
コード例 #4
0
    def test_leduc_rnr_works(self):
        game = acpc.read_game_file(LEDUC_POKER_GAME_FILE_PATH)

        opponent_strategy = GameTreeBuilder(
            game, StrategyTreeNodeProvider()).build_tree()

        def on_node(node):
            if isinstance(node, ActionNode):
                action_count = len(node.children)
                action_probability = 1 / action_count
                for a in node.children:
                    node.strategy[a] = action_probability

        walk_trees(on_node, opponent_strategy)

        rnr = RestrictedNashResponse(game,
                                     opponent_strategy,
                                     0.5,
                                     show_progress=False)
        rnr.train(10, 5)
コード例 #5
0
def create_agent_strategy_from_trained_strategy(
        game_file_path,
        strategy_tree,
        tilt_action,
        tilt_type,
        tilt_probability,
        in_place=False):

    tilt_action_index = tilt_action.value

    def on_node(node):
        if tilt_action_index in node.children:
            original_tilt_action_probability = node.strategy[tilt_action_index]
            new_tilt_action_probability = None
            if tilt_type == TiltType.ADD:
                new_tilt_action_probability = np.clip(original_tilt_action_probability + tilt_probability, 0, 1)
            elif tilt_type == TiltType.MULTIPLY:
                new_tilt_action_probability = np.clip(
                    original_tilt_action_probability + original_tilt_action_probability * tilt_probability, 0, 1)
            node.strategy[tilt_action_index] = new_tilt_action_probability
            diff = new_tilt_action_probability - original_tilt_action_probability
            other_actions_probability = 1 - original_tilt_action_probability
            if diff != 0 and other_actions_probability == 0:
                other_action_probability_diff = diff / (len(node.children) - 1)
                for a in filter(lambda a: a != tilt_action_index, node.children):
                    node.strategy[a] -= other_action_probability_diff
            elif diff != 0:
                for a in filter(lambda a: a != tilt_action_index, node.children):
                    node.strategy[a] -= diff * (node.strategy[a] / other_actions_probability)

    result_strategy = None
    if in_place:
        result_strategy = strategy_tree
    else:
        game = acpc.read_game_file(game_file_path)
        result_strategy = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree()
        copy_strategy(result_strategy, strategy_tree)

    walk_trees(on_node, result_strategy)
    return result_strategy
コード例 #6
0
 def create_strategy(self, game, node_strategy_creator_callback):
     strategy = GameTreeBuilder(game,
                                StrategyTreeNodeProvider()).build_tree()
     walk_trees(node_strategy_creator_callback, strategy)
     return strategy
コード例 #7
0
    def train(self, opponent_strategy, exploitability,
              max_exploitability_delta):

        result_strategy = GameTreeBuilder(
            self.game, StrategyTreeNodeProvider()).build_tree()
        best_exploitability = float('inf')
        best_exploitability_delta = float('inf')

        def checkpoint_callback(game_tree, checkpoint_index, iterations):
            if iterations <= ((3 / 4) * self.iterations):
                # Make sure the strategy at least partially converged
                return

            nonlocal result_strategy
            nonlocal best_exploitability_delta
            nonlocal best_exploitability

            current_exploitability = self.exp.evaluate(game_tree)
            current_exploitability_delta = abs(current_exploitability -
                                               exploitability)
            if current_exploitability_delta < best_exploitability_delta:
                if current_exploitability_delta <= max_exploitability_delta:
                    copy_strategy(result_strategy, game_tree)
                best_exploitability_delta = current_exploitability_delta
                best_exploitability = current_exploitability

        iteration = 0
        p_low = 0
        p_high = 1

        if self.show_progress:
            print()
            print('Exploitability: %s +- %s' %
                  (exploitability, max_exploitability_delta))

        while True:
            if self.show_progress:
                iteration += 1
                print('Run %s' % iteration)
                print('Interval: %s - %s' % (p_low, p_high))
            p_current = p_low + (p_high - p_low) / 2
            rnr = RestrictedNashResponse(self.game,
                                         opponent_strategy,
                                         p_current,
                                         show_progress=self.show_progress)
            if self.weight_delay:
                rnr.train(self.iterations,
                          checkpoint_iterations=self.checkpoint_iterations,
                          checkpoint_callback=checkpoint_callback,
                          weight_delay=self.weight_delay)
            else:
                rnr.train(self.iterations,
                          checkpoint_iterations=self.checkpoint_iterations,
                          checkpoint_callback=checkpoint_callback)

            if best_exploitability_delta < max_exploitability_delta:
                print('Result exploitability: %s, p=%s' %
                      (best_exploitability, p_current))
                return result_strategy, best_exploitability, p_current

            if self.show_progress:
                print('Exploitability: %s, p=%s, current_delta=%s' %
                      (best_exploitability, p_current,
                       best_exploitability_delta))

            if best_exploitability > exploitability:
                p_high = p_current
            else:
                p_low = p_current
            best_exploitability = float('inf')
            best_exploitability_delta = float('inf')
コード例 #8
0
    def train_and_show_results(self, test_spec):
        game = acpc.read_game_file(test_spec['game_file_path'])

        weak_opponent_samples_tree = GameTreeBuilder(
            game, SamplesTreeNodeProvider()).build_tree()
        weak_opponent_strategy_tree = GameTreeBuilder(
            game, StrategyTreeNodeProvider()).build_tree()

        def on_node(samples_node, strategy_node):
            if isinstance(samples_node, ActionNode):
                child_count = len(samples_node.children)
                samples_count = random.randrange(15)
                for i, a in enumerate(samples_node.children):
                    if i < (child_count - 1) and samples_count > 0:
                        action_samples_count = random.randrange(samples_count +
                                                                1)
                        samples_count -= action_samples_count
                        samples_node.action_decision_counts[
                            a] = action_samples_count
                    else:
                        samples_node.action_decision_counts[a] = samples_count
                samples_sum = np.sum(samples_node.action_decision_counts)
                if samples_sum > 0:
                    strategy_node.strategy = samples_node.action_decision_counts / samples_sum
                else:
                    for a in strategy_node.children:
                        strategy_node.strategy[a] = 1 / len(
                            strategy_node.children)

        walk_trees(on_node, weak_opponent_samples_tree,
                   weak_opponent_strategy_tree)

        self.assertTrue(is_correct_strategy(weak_opponent_strategy_tree))

        exploitability = Exploitability(game)
        num_test_counts = test_spec['test_counts']
        data = np.zeros([num_test_counts, 2, len(P_MAX_VALUES)])
        for i in range(num_test_counts):
            print('%s/%s' % (i + 1, num_test_counts))

            for j, p_max in enumerate(P_MAX_VALUES):
                print('Pmax: %s - %s/%s' % (p_max, j + 1, len(P_MAX_VALUES)))

                dbr = DataBiasedResponse(game,
                                         weak_opponent_samples_tree,
                                         p_max=p_max)
                dbr.train(test_spec['training_iterations'])

                data[i, 0, j] = exploitability.evaluate(dbr.game_tree)
                data[i, 1,
                     j] = exploitability.evaluate(weak_opponent_strategy_tree,
                                                  dbr.game_tree)

                plt.figure(dpi=160)
                for k in range(i + 1):
                    run_index = math.floor(k / 2)
                    xdata = data[k,
                                 0, :] if k < i or j == (len(P_MAX_VALUES) -
                                                         1) else data[k, 0,
                                                                      0:j + 1]
                    ydata = data[k,
                                 1, :] if k < i or j == (len(P_MAX_VALUES) -
                                                         1) else data[k, 1,
                                                                      0:j + 1]
                    plt.plot(xdata,
                             ydata,
                             label='Run %s' % (run_index + 1),
                             marker='o',
                             linewidth=0.8)

                if 'title' in test_spec:
                    plt.title(test_spec['title'])
                plt.xlabel('DBR trained strategy exploitability [mbb/g]')
                plt.ylabel(
                    'Random opponent exploitation by DBR strategy [mbb/g]')
                plt.grid()
                if num_test_counts > 1:
                    plt.legend()

                game_name = test_spec['game_file_path'].split('/')[1][:-5]
                figure_output_path = '%s/%s(it:%s).png' % (
                    FIGURES_FOLDER, game_name,
                    test_spec['training_iterations'])

                figures_directory = os.path.dirname(figure_output_path)
                if not os.path.exists(figures_directory):
                    os.makedirs(figures_directory)

                plt.savefig(figure_output_path)

        print('\033[91mThis test needs your assistance! ' +
              'Check the generated graph %s!\033[0m' % figure_output_path)
コード例 #9
0
    def train_and_show_results(self, test_spec):
        game = acpc.read_game_file(test_spec['game_file_path'])

        exploitability = Exploitability(game)

        iteration_counts = np.zeros(0)
        exploitability_values = np.zeros([1, 0])
        best_exploitability = float("inf")
        best_exploitability_strategy = GameTreeBuilder(
            game, StrategyTreeNodeProvider()).build_tree()

        def checkpoint_callback(game_tree, checkpoint_index, iterations):
            nonlocal iteration_counts
            nonlocal exploitability_values
            nonlocal best_exploitability
            nonlocal best_exploitability_strategy

            iteration_counts = np.append(iteration_counts, iterations)

            if CHECK_STRATEGY_CORRECTNESS:
                self.assertTrue(is_correct_strategy(game_tree))

            exploitability_value = exploitability.evaluate(game_tree)
            exploitability_values = np.append(exploitability_values,
                                              exploitability_value)
            if COLLECT_MIN_EXPLOITABILITY and exploitability_value < best_exploitability:
                best_exploitability = exploitability_value
                copy_strategy(best_exploitability_strategy, game_tree)

        cfr = Cfr(game)
        cfr.train(test_spec['training_iterations'],
                  weight_delay=test_spec['weight_delay'],
                  checkpoint_iterations=test_spec['checkpoint_iterations'],
                  checkpoint_callback=checkpoint_callback,
                  minimal_action_probability=0.00006)

        best_response = BestResponse(game).solve(cfr.game_tree)
        player_utilities, _ = PlayerUtility(game).evaluate(
            cfr.game_tree, best_response)
        print(player_utilities.tolist())
        print('Exploitability: %s' % exploitability.evaluate(cfr.game_tree))

        if COLLECT_MIN_EXPLOITABILITY:
            min_exploitability = exploitability.evaluate(
                best_exploitability_strategy)
            min_exploitability_best_response = BestResponse(game).solve(
                best_exploitability_strategy)
            min_exploitability_player_utilities, _ = PlayerUtility(
                game).evaluate(best_exploitability_strategy,
                               min_exploitability_best_response)
            self.assertEqual(min_exploitability, exploitability_values.min())
            print('Minimum exploitability: %s' % min_exploitability)
            print('Minimum exploitability player utilities: %s' %
                  min_exploitability_player_utilities.tolist())
        else:
            print('Minimum exploitability: %s' % exploitability_values.min())

        plt.figure(dpi=160)
        plt.plot(iteration_counts, exploitability_values, linewidth=0.8)

        plt.title(test_spec['title'])
        plt.xlabel('Training iterations')
        plt.ylabel('Strategy exploitability [mbb/g]')
        plt.grid()

        game_name = test_spec['game_file_path'].split('/')[1][:-5]
        figure_output_path = '%s/%s(it:%s-st:%s).png' % (
            FIGURES_FOLDER, game_name, test_spec['training_iterations'],
            test_spec['checkpoint_iterations'])

        figures_directory = os.path.dirname(figure_output_path)
        if not os.path.exists(figures_directory):
            os.makedirs(figures_directory)

        plt.savefig(figure_output_path)

        write_strategy_to_file(
            cfr.game_tree, '%s/%s(it:%s).strategy' %
            (FIGURES_FOLDER, game_name, test_spec['training_iterations']), [
                '# Game utility against best response: %s' %
                player_utilities.tolist()
            ])