コード例 #1
0
def copy_strategy(dst, src):
    def on_node(dst_node, src_node):
        if isinstance(dst_node, ActionNode):
            np.copyto(dst_node.strategy, src_node.strategy)
        return [src_node.children[a] for a in src_node.children]

    walk_trees(on_node, dst, src)
コード例 #2
0
def read_strategy_from_file(game, strategy_file_path):
    strategy = {}
    with open(strategy_file_path, 'r') as strategy_file:
        for line in strategy_file:
            if not line.strip() or line.strip().startswith('#'):
                continue
            line_split = line.split(' ')
            strategy[line_split[0]] = [
                float(probStr) for probStr in line_split[1:4]
            ]

    if not game:
        return strategy

    game_instance = acpc.read_game_file(game) if isinstance(game,
                                                            str) else game
    strategy_tree = GameTreeBuilder(game_instance,
                                    StrategyTreeNodeProvider()).build_tree()

    def on_node(node):
        if isinstance(node, ActionNode):
            nonlocal strategy
            node_strategy = np.array(strategy[str(node)])
            np.copyto(node.strategy, node_strategy)

    walk_trees(on_node, strategy_tree)
    return strategy_tree, strategy
コード例 #3
0
def is_strategies_equal(first, second):
    equal = True

    def on_node(first_node, second_node):
        if isinstance(first_node, ActionNode):
            for a in range(3):
                if not isclose(first_node.strategy[a],
                               second_node.strategy[a]):
                    nonlocal equal
                    equal = False

    walk_trees(on_node, first, second)
    return equal
コード例 #4
0
    def __init__(self, game, strategies):
        self.strategy = GameTreeBuilder(
            game, StrategiesWeightedMixtureTreeNodeProvider()).build_tree()
        self.weights = np.ones(len(strategies)) / len(strategies)

        def on_nodes(*nodes):
            mixture_node = nodes[0]
            if isinstance(mixture_node, ActionNode):
                mixture_node.weights = self.weights
                mixture_node.strategies = np.zeros([len(strategies), 3])
                for i, node in enumerate(nodes[1:]):
                    mixture_node.strategies[i, :] = node.strategy

        walk_trees(on_nodes, self.strategy, *strategies)
コード例 #5
0
def is_correct_strategy(strategy_tree):
    correct = True

    def on_node(node):
        if isinstance(node, ActionNode):
            nonlocal correct
            strategy_sum = np.sum(node.strategy)
            if not isclose(strategy_sum, 1):
                correct = False
            for i in range(3):
                if i not in node.children and node.strategy[i] != 0:
                    correct = False

    walk_trees(on_node, strategy_tree)
    return correct
コード例 #6
0
    def test_strategy_writing_and_reading(self):
        game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH)
        strategy_tree = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree()

        def on_node(node):
            if isinstance(node, ActionNode):
                for a in range(3):
                    if a in node.children:
                        node.strategy[a] = 0.5
                    else:
                        node.strategy[a] = 7
        walk_trees(on_node, strategy_tree)

        write_strategy_to_file(strategy_tree, 'test/io_test_dummy.strategy')
        read_strategy_tree, _ = read_strategy_from_file(KUHN_POKER_GAME_FILE_PATH, 'test/io_test_dummy.strategy')
        self.assertTrue(is_strategies_equal(strategy_tree, read_strategy_tree))
コード例 #7
0
    def __init__(
            self,
            game,
            opponent_strategy_tree,
            p,
            show_progress=True):
        super().__init__(game, show_progress)
        self.p = p

        opponent_strategy = {}
        def callback(node):
            if isinstance(node, ActionNode):
                nonlocal opponent_strategy
                opponent_strategy[str(node)] = node.strategy
        walk_trees(callback, opponent_strategy_tree)
        self.opponent_strategy = opponent_strategy
コード例 #8
0
    def __init__(self,
                 game,
                 opponent_sample_tree,
                 p_max=0.8,
                 show_progress=True):
        super().__init__(game, show_progress)
        self.p_max = p_max

        opponent_action_decision_counts = {}

        def callback(node):
            if isinstance(node, ActionNode):
                nonlocal opponent_action_decision_counts
                opponent_action_decision_counts[str(
                    node)] = node.action_decision_counts

        walk_trees(callback, opponent_sample_tree)
        self.opponent_action_decision_counts = opponent_action_decision_counts
コード例 #9
0
    def test_leduc_rnr_works(self):
        game = acpc.read_game_file(LEDUC_POKER_GAME_FILE_PATH)

        opponent_strategy = GameTreeBuilder(
            game, StrategyTreeNodeProvider()).build_tree()

        def on_node(node):
            if isinstance(node, ActionNode):
                action_count = len(node.children)
                action_probability = 1 / action_count
                for a in node.children:
                    node.strategy[a] = action_probability

        walk_trees(on_node, opponent_strategy)

        rnr = RestrictedNashResponse(game,
                                     opponent_strategy,
                                     0.5,
                                     show_progress=False)
        rnr.train(10, 5)
コード例 #10
0
def create_agent_strategy_from_trained_strategy(
        game_file_path,
        strategy_tree,
        tilt_action,
        tilt_type,
        tilt_probability,
        in_place=False):

    tilt_action_index = tilt_action.value

    def on_node(node):
        if tilt_action_index in node.children:
            original_tilt_action_probability = node.strategy[tilt_action_index]
            new_tilt_action_probability = None
            if tilt_type == TiltType.ADD:
                new_tilt_action_probability = np.clip(original_tilt_action_probability + tilt_probability, 0, 1)
            elif tilt_type == TiltType.MULTIPLY:
                new_tilt_action_probability = np.clip(
                    original_tilt_action_probability + original_tilt_action_probability * tilt_probability, 0, 1)
            node.strategy[tilt_action_index] = new_tilt_action_probability
            diff = new_tilt_action_probability - original_tilt_action_probability
            other_actions_probability = 1 - original_tilt_action_probability
            if diff != 0 and other_actions_probability == 0:
                other_action_probability_diff = diff / (len(node.children) - 1)
                for a in filter(lambda a: a != tilt_action_index, node.children):
                    node.strategy[a] -= other_action_probability_diff
            elif diff != 0:
                for a in filter(lambda a: a != tilt_action_index, node.children):
                    node.strategy[a] -= diff * (node.strategy[a] / other_actions_probability)

    result_strategy = None
    if in_place:
        result_strategy = strategy_tree
    else:
        game = acpc.read_game_file(game_file_path)
        result_strategy = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree()
        copy_strategy(result_strategy, strategy_tree)

    walk_trees(on_node, result_strategy)
    return result_strategy
コード例 #11
0
 def create_strategy(self, game, node_strategy_creator_callback):
     strategy = GameTreeBuilder(game,
                                StrategyTreeNodeProvider()).build_tree()
     walk_trees(node_strategy_creator_callback, strategy)
     return strategy
コード例 #12
0
    def train_and_show_results(self, test_spec):
        game = acpc.read_game_file(test_spec['game_file_path'])

        weak_opponent_samples_tree = GameTreeBuilder(
            game, SamplesTreeNodeProvider()).build_tree()
        weak_opponent_strategy_tree = GameTreeBuilder(
            game, StrategyTreeNodeProvider()).build_tree()

        def on_node(samples_node, strategy_node):
            if isinstance(samples_node, ActionNode):
                child_count = len(samples_node.children)
                samples_count = random.randrange(15)
                for i, a in enumerate(samples_node.children):
                    if i < (child_count - 1) and samples_count > 0:
                        action_samples_count = random.randrange(samples_count +
                                                                1)
                        samples_count -= action_samples_count
                        samples_node.action_decision_counts[
                            a] = action_samples_count
                    else:
                        samples_node.action_decision_counts[a] = samples_count
                samples_sum = np.sum(samples_node.action_decision_counts)
                if samples_sum > 0:
                    strategy_node.strategy = samples_node.action_decision_counts / samples_sum
                else:
                    for a in strategy_node.children:
                        strategy_node.strategy[a] = 1 / len(
                            strategy_node.children)

        walk_trees(on_node, weak_opponent_samples_tree,
                   weak_opponent_strategy_tree)

        self.assertTrue(is_correct_strategy(weak_opponent_strategy_tree))

        exploitability = Exploitability(game)
        num_test_counts = test_spec['test_counts']
        data = np.zeros([num_test_counts, 2, len(P_MAX_VALUES)])
        for i in range(num_test_counts):
            print('%s/%s' % (i + 1, num_test_counts))

            for j, p_max in enumerate(P_MAX_VALUES):
                print('Pmax: %s - %s/%s' % (p_max, j + 1, len(P_MAX_VALUES)))

                dbr = DataBiasedResponse(game,
                                         weak_opponent_samples_tree,
                                         p_max=p_max)
                dbr.train(test_spec['training_iterations'])

                data[i, 0, j] = exploitability.evaluate(dbr.game_tree)
                data[i, 1,
                     j] = exploitability.evaluate(weak_opponent_strategy_tree,
                                                  dbr.game_tree)

                plt.figure(dpi=160)
                for k in range(i + 1):
                    run_index = math.floor(k / 2)
                    xdata = data[k,
                                 0, :] if k < i or j == (len(P_MAX_VALUES) -
                                                         1) else data[k, 0,
                                                                      0:j + 1]
                    ydata = data[k,
                                 1, :] if k < i or j == (len(P_MAX_VALUES) -
                                                         1) else data[k, 1,
                                                                      0:j + 1]
                    plt.plot(xdata,
                             ydata,
                             label='Run %s' % (run_index + 1),
                             marker='o',
                             linewidth=0.8)

                if 'title' in test_spec:
                    plt.title(test_spec['title'])
                plt.xlabel('DBR trained strategy exploitability [mbb/g]')
                plt.ylabel(
                    'Random opponent exploitation by DBR strategy [mbb/g]')
                plt.grid()
                if num_test_counts > 1:
                    plt.legend()

                game_name = test_spec['game_file_path'].split('/')[1][:-5]
                figure_output_path = '%s/%s(it:%s).png' % (
                    FIGURES_FOLDER, game_name,
                    test_spec['training_iterations'])

                figures_directory = os.path.dirname(figure_output_path)
                if not os.path.exists(figures_directory):
                    os.makedirs(figures_directory)

                plt.savefig(figure_output_path)

        print('\033[91mThis test needs your assistance! ' +
              'Check the generated graph %s!\033[0m' % figure_output_path)