def __init__(
            self,
            game_file_path,
            portfolio_strategy_files_paths,
            exp3g_gamma=0.02,
            exp3g_eta=0.025,
            utility_estimator_class=SimpleUtilityEstimator,
            utility_estimator_args=None):
        super().__init__()
        self.portfolio_size = len(portfolio_strategy_files_paths)
        self.bandit_algorithm = Exp3G(exp3g_gamma, exp3g_eta, self.portfolio_size)
        game = acpc.read_game_file(game_file_path)
        if utility_estimator_args is None:
            self.utility_estimator = utility_estimator_class(game, True)
        else:
            self.utility_estimator = utility_estimator_class(game, True, **utility_estimator_args)

        self.portfolio_trees = []
        self.portfolio_dicts = []
        for portfolio_strategy_file_path in portfolio_strategy_files_paths:
            strategy_tree, strategy_dict = read_strategy_from_file(game_file_path, portfolio_strategy_file_path)
            self.portfolio_trees += [strategy_tree]
            self.portfolio_dicts += [strategy_dict]

        self.portfolio_strategies_mixture = StrategiesWeightedMixture(game, self.portfolio_trees)
예제 #2
0
def read_strategy_from_file(game, strategy_file_path):
    strategy = {}
    with open(strategy_file_path, 'r') as strategy_file:
        for line in strategy_file:
            if not line.strip() or line.strip().startswith('#'):
                continue
            line_split = line.split(' ')
            strategy[line_split[0]] = [
                float(probStr) for probStr in line_split[1:4]
            ]

    if not game:
        return strategy

    game_instance = acpc.read_game_file(game) if isinstance(game,
                                                            str) else game
    strategy_tree = GameTreeBuilder(game_instance,
                                    StrategyTreeNodeProvider()).build_tree()

    def on_node(node):
        if isinstance(node, ActionNode):
            nonlocal strategy
            node_strategy = np.array(strategy[str(node)])
            np.copyto(node.strategy, node_strategy)

    walk_trees(on_node, strategy_tree)
    return strategy_tree, strategy
예제 #3
0
def train_portfolio_responses(game_file_path,
                              opponent_strategy_trees,
                              rnr_params,
                              callback=None,
                              log=False,
                              parallel=False):
    num_opponents = len(opponent_strategy_trees)

    game = acpc.read_game_file(game_file_path)

    if log:
        print()

    responses = [None] * num_opponents
    params = [(i, num_opponents, log, parallel, game, rnr_params,
               opponent_strategy_trees) for i in range(num_opponents)]
    if parallel:
        with multiprocessing.Pool(max(int(multiprocessing.cpu_count() / 2),
                                      2)) as p:
            for i, result in enumerate(
                    p.imap_unordered(_train_response, params)):
                response_index, response_strategy = result
                responses[response_index] = response_strategy
                if callback:
                    callback(response_index, response_strategy)
                if log:
                    print('Progress: %s/%s' % (i + 1, num_opponents))
    else:
        for response_index, response_strategy in map(
                lambda p: _train_response(p), params):
            if callback:
                callback(response_index, response_strategy)
            responses[response_index] = response_strategy

    return responses
예제 #4
0
    def test_build_portfolio_not_crashing(self):
        game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH)

        def on_node_always_call(node):
            if isinstance(node, ActionNode):
                node.strategy[1] = 1

        def on_node_always_fold(node):
            if isinstance(node, ActionNode):
                if 0 in node.children:
                    node.strategy[0] = 1
                else:
                    node.strategy[1] = 1

        def on_node_uniform(node):
            if isinstance(node, ActionNode):
                action_count = len(node.children)
                action_probability = 1 / action_count
                for a in node.children:
                    node.strategy[a] = action_probability

        opponents = [
            self.create_strategy(game, on_node_always_call),
            self.create_strategy(game, on_node_always_fold),
            self.create_strategy(game, on_node_uniform)
        ]

        opponent_responses = train_portfolio_responses(
            KUHN_POKER_GAME_FILE_PATH, opponents,
            [(100, 800, 10, 2, 2)] * len(opponents))
        portfolio_strategies, opponent_indices = optimize_portfolio(
            KUHN_POKER_GAME_FILE_PATH, opponents, opponent_responses)
        self.assertGreaterEqual(len(portfolio_strategies), 1)
        self.assertEqual(len(portfolio_strategies), len(opponent_indices))
예제 #5
0
    def test_kuhn_cfr_checkpointing(self):
        game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH)
        cfr = Cfr(game, show_progress=False)

        checkpoints_count = 0
        def checkpoint_callback(game_tree, checkpoint_index, iterations):
            nonlocal checkpoints_count
            self.assertTrue(game_tree is not None)
            self.assertEqual(checkpoint_index, checkpoints_count)
            checkpoints_count += 1

        cfr.train(60, weight_delay=15, checkpoint_iterations=15, checkpoint_callback=checkpoint_callback)

        self.assertEqual(checkpoints_count, 3)
예제 #6
0
    def test_strategy_writing_and_reading(self):
        game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH)
        strategy_tree = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree()

        def on_node(node):
            if isinstance(node, ActionNode):
                for a in range(3):
                    if a in node.children:
                        node.strategy[a] = 0.5
                    else:
                        node.strategy[a] = 7
        walk_trees(on_node, strategy_tree)

        write_strategy_to_file(strategy_tree, 'test/io_test_dummy.strategy')
        read_strategy_tree, _ = read_strategy_from_file(KUHN_POKER_GAME_FILE_PATH, 'test/io_test_dummy.strategy')
        self.assertTrue(is_strategies_equal(strategy_tree, read_strategy_tree))
예제 #7
0
    def test_read_game_file(self):
        game = acpc.read_game_file('test.game')
        self.assertEqual(game.get_num_players(), 3)
        self.assertEqual(game.get_num_rounds(), 4)

        self.assertEqual(game.get_blind(0), 5)
        self.assertEqual(game.get_blind(1), 10)
        self.assertEqual(game.get_blind(2), 0)

        self.assertEqual(game.get_num_hole_cards(), 2)

        self.assertEqual(game.get_num_board_cards(0), 0)
        self.assertEqual(game.get_num_board_cards(1), 3)
        self.assertEqual(game.get_num_board_cards(2), 1)
        self.assertEqual(game.get_num_board_cards(3), 1)

        self.assertEqual(game.get_num_ranks(), 13)
        self.assertEqual(game.get_num_suits(), 4)
예제 #8
0
    def train_and_show_results(self, test_spec):
        game_file_path = test_spec['game_file_path']
        game = acpc.read_game_file(game_file_path)

        base_strategy, _ = read_strategy_from_file(
            game_file_path, test_spec['base_strategy_path'])

        opponent = test_spec['opponent']
        opponent_strategy = create_agent_strategy_from_trained_strategy(
            game_file_path, base_strategy, opponent[1], opponent[2],
            opponent[3])

        strategy, exploitability, p = RnrParameterOptimizer(game).train(
            opponent_strategy, test_spec['exploitability'],
            test_spec['max_delta'])

        self.assertTrue(strategy != None)
        self.assertTrue(is_correct_strategy(strategy))
        print('Final exploitability is %s with p of %s' % (exploitability, p))
예제 #9
0
def create_agent_strategy(
        game_file_path,
        tilt_action,
        tilt_type,
        tilt_probability,
        cfr_iterations=2000,
        cfr_weight_delay=700,
        show_progress=True):

    game = acpc.read_game_file(game_file_path)
    cfr = Cfr(game, show_progress=show_progress)
    cfr.train(cfr_iterations, cfr_weight_delay)
    return create_agent_strategy_from_trained_strategy(
        game_file_path,
        cfr.game_tree,
        tilt_action,
        tilt_type,
        tilt_probability,
        True)
    def test_leduc_rnr_works(self):
        game = acpc.read_game_file(LEDUC_POKER_GAME_FILE_PATH)

        opponent_strategy = GameTreeBuilder(
            game, StrategyTreeNodeProvider()).build_tree()

        def on_node(node):
            if isinstance(node, ActionNode):
                action_count = len(node.children)
                action_probability = 1 / action_count
                for a in node.children:
                    node.strategy[a] = action_probability

        walk_trees(on_node, opponent_strategy)

        rnr = RestrictedNashResponse(game,
                                     opponent_strategy,
                                     0.5,
                                     show_progress=False)
        rnr.train(10, 5)
    def test_kuhn_action_minus_tilted_agent(self):
        kuhn_equilibrium, _ = read_strategy_from_file(
            KUHN_POKER_GAME_FILE_PATH,
            'strategies/kuhn.limit.2p-equilibrium.strategy')

        game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH)
        exploitability = Exploitability(game)

        tilted_agent_strategy = create_agent_strategy_from_trained_strategy(
            KUHN_POKER_GAME_FILE_PATH, kuhn_equilibrium, Action.CALL,
            TiltType.ADD, -0.5)
        self.assertTrue(is_correct_strategy(tilted_agent_strategy))
        self.assertTrue(
            not is_strategies_equal(kuhn_equilibrium, tilted_agent_strategy))

        equilibrium_exploitability = exploitability.evaluate(kuhn_equilibrium)
        raise_add_tilted_exploitability = exploitability.evaluate(
            tilted_agent_strategy)
        self.assertTrue(
            raise_add_tilted_exploitability > equilibrium_exploitability)
예제 #12
0
def create_agent_strategy_from_trained_strategy(
        game_file_path,
        strategy_tree,
        tilt_action,
        tilt_type,
        tilt_probability,
        in_place=False):

    tilt_action_index = tilt_action.value

    def on_node(node):
        if tilt_action_index in node.children:
            original_tilt_action_probability = node.strategy[tilt_action_index]
            new_tilt_action_probability = None
            if tilt_type == TiltType.ADD:
                new_tilt_action_probability = np.clip(original_tilt_action_probability + tilt_probability, 0, 1)
            elif tilt_type == TiltType.MULTIPLY:
                new_tilt_action_probability = np.clip(
                    original_tilt_action_probability + original_tilt_action_probability * tilt_probability, 0, 1)
            node.strategy[tilt_action_index] = new_tilt_action_probability
            diff = new_tilt_action_probability - original_tilt_action_probability
            other_actions_probability = 1 - original_tilt_action_probability
            if diff != 0 and other_actions_probability == 0:
                other_action_probability_diff = diff / (len(node.children) - 1)
                for a in filter(lambda a: a != tilt_action_index, node.children):
                    node.strategy[a] -= other_action_probability_diff
            elif diff != 0:
                for a in filter(lambda a: a != tilt_action_index, node.children):
                    node.strategy[a] -= diff * (node.strategy[a] / other_actions_probability)

    result_strategy = None
    if in_place:
        result_strategy = strategy_tree
    else:
        game = acpc.read_game_file(game_file_path)
        result_strategy = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree()
        copy_strategy(result_strategy, strategy_tree)

    walk_trees(on_node, result_strategy)
    return result_strategy
예제 #13
0
def read_log_file(game_file_path,
                  log_file_path,
                  player_names,
                  player_trees=None):

    game = acpc.read_game_file(game_file_path)
    num_players = game.get_num_players()
    if len(player_names) != num_players:
        raise AttributeError('Wrong number of player names provided')
    if game.get_betting_type() != acpc.BettingType.LIMIT:
        raise AttributeError('Only limit betting games are supported')

    players = {}
    for i in range(num_players):
        player_name = player_names[i]
        player_tree = None
        if player_trees and player_name in player_trees:
            player_tree = player_trees[player_name]
        else:
            player_tree = GameTreeBuilder(
                game, SamplesTreeNodeProvider()).build_tree()
        players[player_name] = player_tree

    with open(log_file_path, 'r') as strategy_file:
        for line in strategy_file:
            if not line.strip() or line.strip().startswith('#') or len(
                    line.split(':')) == 3:
                continue
            player_names = [
                name.strip() for name in line.split(':')[-1].split('|')
            ]
            state = acpc.parse_state(game_file_path, line)

            current_player_trees = [players[name] for name in player_names]
            _add_state_to_sample_trees(game, state, current_player_trees, 0, 0)

    return players
예제 #14
0
    def train_and_show_results(self, test_spec):
        game_file_path = test_spec['game_file_path']
        portfolio_name = test_spec['portfolio_name']
        agent_specs = test_spec['opponent_tilt_types']

        if not _check_agent_names_unique(agent_specs):
            raise AttributeError(
                'Agents must be unique so that they have unique names')

        strategies_directory_base = '%s/%s' % (TEST_OUTPUT_DIRECTORY,
                                               portfolio_name)
        strategies_directory = strategies_directory_base
        if 'overwrite_portfolio_path' not in test_spec or not test_spec[
                'overwrite_portfolio_path']:
            counter = 1
            while os.path.exists(strategies_directory):
                strategies_directory = '%s(%s)' % (strategies_directory_base,
                                                   counter)
                counter += 1
        if not os.path.exists(strategies_directory):
            os.makedirs(strategies_directory)

        game = acpc.read_game_file(game_file_path)
        exp = Exploitability(game)

        # Delete results since they will be generated again
        for file in os.listdir(strategies_directory):
            absolute_path = '/'.join([strategies_directory, file])
            if os.path.isfile(absolute_path):
                os.remove(absolute_path)

        base_strategy, _ = read_strategy_from_file(
            game_file_path, test_spec['base_strategy_path'])

        num_opponents = len(agent_specs)
        opponents = []
        for agent in agent_specs:
            opponent_strategy = create_agent_strategy_from_trained_strategy(
                game_file_path, base_strategy, agent[0], agent[1], agent[2])
            opponents += [opponent_strategy]

        parallel = test_spec['parallel'] if 'parallel' in test_spec else False

        response_paths = [
            '%s/responses/%s-response.strategy' %
            (strategies_directory, _get_agent_name(agent))
            for agent in agent_specs
        ]

        opponent_responses = [None] * num_opponents
        responses_to_train_indices = []
        responses_to_train_opponents = []
        responses_to_train_params = []
        for i in range(num_opponents):
            if os.path.exists(response_paths[i]):
                response_strategy, _ = read_strategy_from_file(
                    game_file_path, response_paths[i])
                opponent_responses[i] = response_strategy
            else:
                responses_to_train_indices += [i]
                responses_to_train_opponents += [opponents[i]]
                responses_to_train_params += [agent_specs[i][3]]

        def on_response_trained(response_index, response_strategy):
            output_file_path = response_paths[
                responses_to_train_indices[response_index]]
            output_file_dir = os.path.dirname(output_file_path)
            if not os.path.exists(output_file_dir):
                os.makedirs(output_file_dir)

            opponent_strategy = opponents[response_index]
            opponent_exploitability = exp.evaluate(opponent_strategy)
            response_exploitability = exp.evaluate(response_strategy)
            response_utility_vs_opponent = exp.evaluate(
                opponent_strategy, response_strategy)

            write_strategy_to_file(response_strategy, output_file_path, [
                'Opponent exploitability: %s' % opponent_exploitability,
                'Response exploitability: %s' % response_exploitability,
                'Response value vs opponent: %s' %
                response_utility_vs_opponent,
            ])

        print('%s responses need to be trained' %
              len(responses_to_train_opponents))

        responses_to_train_strategies = train_portfolio_responses(
            game_file_path,
            responses_to_train_opponents,
            responses_to_train_params,
            log=True,
            parallel=parallel,
            callback=on_response_trained)

        for i, j in enumerate(responses_to_train_indices):
            opponent_responses[j] = responses_to_train_strategies[i]

        if 'portfolio_cut_improvement_threshold' in test_spec:
            portfolio_strategies, response_indices = optimize_portfolio(
                game_file_path,
                opponents,
                opponent_responses,
                portfolio_cut_improvement_threshold=test_spec[
                    'portfolio_cut_improvement_threshold'],
                log=True,
                output_directory=strategies_directory)
        else:
            portfolio_strategies, response_indices = optimize_portfolio(
                game_file_path,
                opponents,
                opponent_responses,
                log=True,
                output_directory=strategies_directory)

        portfolio_size = len(portfolio_strategies)

        agent_names = [
            _get_agent_name(agent)
            for agent in np.take(agent_specs, response_indices, axis=0)
        ]

        print()
        for a in agent_specs:
            print(_get_agent_name(a))

        response_strategy_file_names = []
        for i, strategy in enumerate(portfolio_strategies):
            agent_name = agent_names[i]

            opponent_strategy = opponents[response_indices[i]]
            opponent_exploitability = exp.evaluate(opponent_strategy)
            response_exploitability = exp.evaluate(strategy)
            response_utility_vs_opponent = exp.evaluate(
                opponent_strategy, strategy)

            # Save portfolio response strategy
            response_strategy_output_file_path = '%s/%s-response.strategy' % (
                strategies_directory, agent_name)
            response_strategy_file_names += [
                response_strategy_output_file_path.split('/')[-1]
            ]
            write_strategy_to_file(
                strategy, response_strategy_output_file_path, [
                    'Opponent exploitability: %s' % opponent_exploitability,
                    'Response exploitability: %s' % response_exploitability,
                    'Response value vs opponent: %s' %
                    response_utility_vs_opponent,
                ])

            # Save opponent strategy
            opponent_strategy_file_name = '%s-opponent.strategy' % agent_name
            opponent_strategy_output_file_path = '%s/%s' % (
                strategies_directory, opponent_strategy_file_name)
            write_strategy_to_file(opponent_strategy,
                                   opponent_strategy_output_file_path)

            # Generate opponent ACPC script
            opponent_script_path = '%s/%s.sh' % (strategies_directory,
                                                 agent_name)
            shutil.copy(BASE_OPPONENT_SCRIPT_PATH, opponent_script_path)
            _replace_in_file(
                opponent_script_path, OPPONENT_SCRIPT_REPLACE_STRINGS, [
                    WARNING_COMMENT, game_file_path,
                    opponent_strategy_output_file_path.split('/')[-1]
                ])

        for utility_estimation_method in UTILITY_ESTIMATION_METHODS:
            agent_name_method_name = '' if utility_estimation_method == UTILITY_ESTIMATION_METHODS[
                0] else '-%s' % utility_estimation_method
            agent_script_path = '%s/%s%s.sh' % (
                strategies_directory, portfolio_name, agent_name_method_name)
            shutil.copy(BASE_AGENT_SCRIPT_PATH, agent_script_path)

            strategies_replacement = ''
            for i in range(portfolio_size):
                strategies_replacement += '        "${SCRIPT_DIR}/%s"' % response_strategy_file_names[
                    i]
                if i < (portfolio_size - 1):
                    strategies_replacement += ' \\\n'
            _replace_in_file(agent_script_path, AGENT_SCRIPT_REPLACE_STRINGS, [
                WARNING_COMMENT, game_file_path,
                '"%s"' % utility_estimation_method, strategies_replacement
            ])
예제 #15
0
    def run_tournament(self, test_spec):
        workspace_dir = os.getcwd()

        game_file_path = workspace_dir + '/' + test_spec['game_file_path']
        game = acpc.read_game_file(game_file_path)
        if game.get_num_players() != 2:
            raise AttributeError('Only games with 2 players are supported')

        tournament_name = test_spec['name']
        confidence = test_spec['confidence']
        max_confidence_interval_half_size = test_spec['max_confidence_interval_half_size']

        logs_base_dir = get_new_path('%s/%s/%s-%s+-%s' % (
            workspace_dir,
            FILES_PATH,
            tournament_name,
            int(confidence * 100),
            int(max_confidence_interval_half_size * 1000)))

        if not os.path.exists(logs_base_dir):
            os.makedirs(logs_base_dir)

        row_agents = test_spec['row_agents']
        row_num_agents = len(row_agents)
        row_agent_scripts_paths = [workspace_dir + '/' + agent[2] for agent in row_agents]

        column_agents = test_spec['column_agents']
        column_num_agents = len(column_agents)
        column_agent_scripts_paths = [workspace_dir + '/' + agent[2] for agent in column_agents]

        seeds = []

        seeds_file_path = '%s/%s/seeds.log' % (workspace_dir, FILES_PATH)
        if not os.path.exists(seeds_file_path):
            max_seed = (2**30) - 1
            for _ in range(5000):
                seeds += [random.randint(1, max_seed)]
            with open(seeds_file_path, 'w') as file:
                for seed in seeds:
                    file.write(str(seed) + '\n')
        else:
            with open(seeds_file_path, 'r') as seeds_file:
                for seed in seeds_file:
                    seeds += [int(float(seed))]

        scores_table = [[None for j in range(column_num_agents)] for i in range(row_num_agents)]

        agent_pairs_evaluated = []

        env = os.environ.copy()
        env['PATH'] = os.path.dirname(sys.executable) + ':' + env['PATH']

        for i in range(row_num_agents):
            for j in range(column_num_agents):
                row_agent_name = row_agents[i][0]
                column_agent_name = column_agents[j][0]
                if row_agent_name == column_agent_name:
                    continue

                agent_pair_key = tuple(sorted([row_agent_name, column_agent_name]))
                if agent_pair_key in agent_pairs_evaluated:
                    continue

                row_agent_script_path = row_agent_scripts_paths[i]
                column_agent_script_path = column_agent_scripts_paths[j]

                match_name = '%s-vs-%s' % (row_agent_name, column_agent_name)
                match_name_reversed = '%s-vs-%s' % (column_agent_name, row_agent_name)
                match_logs_dir = ('%s/%s' % (logs_base_dir, match_name)).replace('\n', '')

                print()
                print('Evaluating %s' % match_name)

                best_confidence_interval_half_size = float('inf')
                row_player_mean_utility = -1
                run_counter = 0
                log_readings = []
                while best_confidence_interval_half_size > max_confidence_interval_half_size:
                    run_counter += 1
                    run_logs_dir = '%s/run_%s' % (match_logs_dir, run_counter)
                    os.makedirs(run_logs_dir)

                    if len(seeds) < run_counter:
                        seeds += [int(datetime.now().timestamp())]
                    seed = seeds[run_counter - 1]

                    normal_order_logs_name = '%s/%s' % (run_logs_dir, match_name)
                    proc = subprocess.Popen(
                        [
                            MATCH_SCRIPT,
                            normal_order_logs_name,
                            game_file_path,
                            str(NUM_TOURNAMENT_HANDS),
                            str(seed),
                            row_agent_name,
                            row_agent_script_path,
                            column_agent_name,
                            column_agent_script_path],
                        cwd=ACPC_INFRASTRUCTURE_DIR,
                        env=env,
                        stdout=subprocess.PIPE)
                    proc.stdout.readline().decode('utf-8').strip()
                    log_readings += [get_player_utilities_from_log_file(normal_order_logs_name + '.log')]

                    reversed_order_logs_name = '%s/%s' % (run_logs_dir, match_name_reversed)
                    proc = subprocess.Popen(
                        [
                            MATCH_SCRIPT,
                            reversed_order_logs_name,
                            game_file_path,
                            str(NUM_TOURNAMENT_HANDS),
                            str(seed),
                            column_agent_name,
                            column_agent_script_path,
                            row_agent_name,
                            row_agent_script_path],
                        cwd=ACPC_INFRASTRUCTURE_DIR,
                        env=env,
                        stdout=subprocess.PIPE)
                    proc.stdout.readline().decode('utf-8').strip()
                    log_readings += [get_player_utilities_from_log_file(reversed_order_logs_name + '.log')]

                    data, player_names = get_logs_data(*log_readings)
                    means, interval_half_size, _, _ = calculate_confidence_interval(data, confidence)

                    print('Run %s, current confidence interval half size: %s' % (run_counter, interval_half_size[0]))
                    best_confidence_interval_half_size = interval_half_size[0]
                    row_player_index = player_names.index(row_agent_name)
                    row_player_mean_utility = means[row_player_index]

                scores_table[i][j] = row_player_mean_utility

                agent_pairs_evaluated += [agent_pair_key]

        print()
        print()
        scores_copy = copy.deepcopy(scores_table)
        for i in range(row_num_agents):
            scores_copy[i] = [row_agents[i][1]] + [None if score is None else score * 1000 for score in scores_copy[i]]
        column_agent_names = [agent[1] for agent in column_agents]
        avg_results_table_string = tabulate(scores_copy, headers=column_agent_names, tablefmt='grid')
        print(avg_results_table_string)

        confidence_line = 'Confidence interval: %s%% +- %s' % (int(confidence * 100), int(max_confidence_interval_half_size * 1000))
        print(confidence_line)

        with open('%s/results.log' % logs_base_dir, 'w') as file:
            file.write(avg_results_table_string)
            file.write('\n')
            file.write('All utilities in mbb/g\n')
            file.write(confidence_line)
            file.write('\n')
예제 #16
0
    def run_evaluation(self, test_spec):
        print()

        workspace_dir = os.getcwd()

        game_file_path = workspace_dir + '/' + test_spec['game_file_path']
        game = acpc.read_game_file(game_file_path)

        if game.get_num_players() != 2:
            raise AttributeError('Only games with 2 players are supported')

        agents = test_spec['agents']
        num_matches = test_spec['num_matches']
        num_match_hands = test_spec['num_match_hands']

        if game.get_num_players() != len(agents):
            raise AttributeError('Wrong number of players')

        game_name = game_file_path.split('/')[-1][:-len('.game')]

        test_directory = '%s/%s/test-%s-[%s]-%sx%s' % (
            workspace_dir, FILES_PATH, game_name, ';'.join(
                map(lambda a: a[0], agents)), num_matches, num_match_hands)
        test_data_directory = '%s/data' % test_directory

        force_recreate_data = test_spec[
            'force_recreate_data'] if 'force_recreate_data' in test_spec else False
        data_created = True
        if not force_recreate_data:
            if os.path.exists(test_directory):
                for i in range(num_matches):
                    if not os.path.exists('%s/match_%s' %
                                          (test_data_directory, i)):
                        data_created = False
                        break
            else:
                data_created = False

        if not data_created or force_recreate_data:
            if os.path.exists(test_data_directory):
                shutil.rmtree(test_data_directory)
            for i in range(num_matches):
                match_data_dir = '%s/match_%s' % (test_data_directory, i)
                if not os.path.exists(match_data_dir):
                    os.makedirs(match_data_dir)

                seed = int(datetime.now().timestamp())

                env = os.environ.copy()
                env['PATH'] = os.path.dirname(
                    sys.executable) + ':' + env['PATH']

                proc = subprocess.Popen([
                    MATCH_SCRIPT,
                    '%s/normal' % match_data_dir,
                    game_file_path,
                    str(num_match_hands),
                    str(seed),
                    agents[0][0],
                    workspace_dir + '/' + agents[0][1],
                    agents[1][0],
                    workspace_dir + '/' + agents[1][1],
                ],
                                        cwd=ACPC_INFRASTRUCTURE_DIR,
                                        env=env,
                                        stdout=subprocess.PIPE)
                proc.stdout.readline().decode('utf-8').strip()

                proc = subprocess.Popen([
                    MATCH_SCRIPT,
                    '%s/reversed' % match_data_dir,
                    game_file_path,
                    str(num_match_hands),
                    str(seed),
                    agents[1][0],
                    workspace_dir + '/' + agents[1][1],
                    agents[0][0],
                    workspace_dir + '/' + agents[0][1],
                ],
                                        cwd=ACPC_INFRASTRUCTURE_DIR,
                                        env=env,
                                        stdout=subprocess.PIPE)
                proc.stdout.readline().decode('utf-8').strip()

            print('Data created')

        log_file_paths = []
        for i in range(num_matches):
            log_file_paths += [
                '%s/match_%s/normal.log' % (test_data_directory, i),
                '%s/match_%s/reversed.log' % (test_data_directory, i),
            ]

        agent_strategies = {}
        for agent in agents:
            if len(agent) >= 3:
                strategy, _ = read_strategy_from_file(game_file_path, agent[2])
                agent_strategies[agent[0]] = strategy

        utility_estimators = test_spec['utility_estimators']
        output_table = [[None for j in range(3)]
                        for i in range(len(utility_estimators))]
        for i, utility_estimator_spec in enumerate(utility_estimators):
            utility_estimator_name = utility_estimator_spec[0]
            utility_estimator_class = utility_estimator_spec[1]
            utility_estimator_instance = None
            if utility_estimator_class is not None:
                if len(utility_estimator_spec) == 2:
                    utility_estimator_instance = utility_estimator_class(
                        game, False)
                elif len(utility_estimator_spec) > 2:
                    utility_estimator_args = utility_estimator_spec[2]
                    utility_estimator_instance = utility_estimator_class(
                        game, False, **utility_estimator_args)
            log_readings = [
                get_player_utilities_from_log_file(
                    log_file_path,
                    game_file_path=game_file_path,
                    utility_estimator=utility_estimator_instance,
                    player_strategies=agent_strategies)
                for log_file_path in log_file_paths
            ]

            data, player_names = get_logs_data(*log_readings)
            player_zero_index = player_names.index(agents[0][0])

            output_table[i][0] = utility_estimator_name
            means = np.mean(data, axis=0)
            stds = np.std(data, axis=0)

            output_table[i][1] = means[player_zero_index]
            output_table[i][2] = stds[player_zero_index]

        print()
        print(tabulate(output_table, headers=['mean', 'SD'], tablefmt='grid'))
        print()
        print('Total num hands: %s' % data.shape[0])
예제 #17
0
 def test_leduc_cfr_works(self):
     game = acpc.read_game_file(LEDUC_POKER_GAME_FILE_PATH)
     cfr = Cfr(game, show_progress=False)
     cfr.train(5, weight_delay=2)
예제 #18
0
 def test_kuhn_bigdeck_2round_cfr_works(self):
     game = acpc.read_game_file(KUHN_BIG_DECK_2ROUND_POKER_GAME_FILE_PATH)
     cfr = Cfr(game, show_progress=False)
     cfr.train(5, weight_delay=2)
예제 #19
0
    def evaluate_agent(self, test_spec):
        portfolio_name = test_spec['portfolio_name']
        portfolio_directory = '%s/%s' % (PORTFOLIOS_DIRECTORY, portfolio_name)

        game_file_path = test_spec['game_file_path']
        game = acpc.read_game_file(game_file_path)
        if game.get_num_players() != 2:
            raise AttributeError('Only games with 2 players are supported')

        response_strategy_paths = []
        opponent_names = []
        opponent_script_paths = []
        for file in os.listdir(portfolio_directory):
            if file.endswith('-response.strategy'):
                response_strategy_paths += [file]
            elif file.endswith('.sh') and not file.startswith(portfolio_name):
                opponent_names += [file[:-len('.sh')]]
                opponent_script_paths += [
                    '%s/%s' % (portfolio_directory, file)
                ]

        portfolio_size = len(response_strategy_paths)

        logs_dir = '/'.join([GAME_LOGS_DIRECTORY, portfolio_name])
        if os.path.exists(logs_dir):
            shutil.rmtree(logs_dir)
        os.makedirs(logs_dir)

        big_blind_size = get_big_blind_size(game)

        env = os.environ.copy()
        env['PATH'] = os.path.dirname(sys.executable) + ':' + env['PATH']

        print()
        for i in range(portfolio_size):
            opponent_name = opponent_names[i]
            logs_path = '%s/%s' % (logs_dir, opponent_name)

            proc = subprocess.Popen([
                START_DEALER_AND_OPPONENT_SCRIPT_PATH, game_file_path,
                logs_path, opponent_name, opponent_script_paths[i],
                portfolio_name
            ],
                                    env=env,
                                    stdout=subprocess.PIPE)
            port_number = proc.stdout.readline().decode('utf-8').strip()

            client = acpc.Client(game_file_path, '127.0.1.1', port_number)

            full_response_strategy_paths = [
                '%s/%s' % (portfolio_directory, s)
                for s in response_strategy_paths
            ]

            utility_estimator_args = test_spec[
                'utility_estimator_args'] if 'utility_estimator_args' in test_spec else None

            client.play(
                ImplicitModellingAgent(
                    game_file_path,
                    full_response_strategy_paths,
                    utility_estimator_class=test_spec[
                        'utility_estimator_class'],
                    utility_estimator_args=utility_estimator_args))

            scores_line = proc.stdout.readline().decode('utf-8').strip()
            agent_score = float(scores_line.split(':')[1].split('|')[1])
            agent_score_mbb_per_game = (agent_score /
                                        NUM_EVAL_HANDS) * big_blind_size
            print('%s vs %s: %s' %
                  (portfolio_name, opponent_name, agent_score_mbb_per_game))
예제 #20
0
def optimize_portfolio(game_file_path,
                       opponent_strategies,
                       response_strategies,
                       portfolio_size=-1,
                       portfolio_cut_improvement_threshold=0.05,
                       log=False,
                       output_directory=None):

    num_opponents = len(opponent_strategies)

    if portfolio_size == num_opponents or portfolio_cut_improvement_threshold == 0:
        return response_strategies, range(num_opponents)

    game = acpc.read_game_file(game_file_path)
    exp = Exploitability(game)

    if log:
        print()

    utilities = np.zeros([num_opponents, num_opponents])
    for i in range(num_opponents):
        for j in range(num_opponents):
            utilities[i, j] = exp.evaluate(opponent_strategies[j],
                                           response_strategies[i])

    portfolio_utilities = np.zeros(num_opponents)
    response_added = np.ones(num_opponents, dtype=np.intp) * -1

    response_total_utility = np.mean(utilities, axis=1)
    best_response_index = np.argmax(response_total_utility)

    portfolio_utilities[0] = response_total_utility[best_response_index]
    response_added[0] = best_response_index

    max_utilities = np.zeros(num_opponents)
    np.copyto(max_utilities, utilities[best_response_index])

    response_available = [True] * num_opponents
    response_available[best_response_index] = False
    for i in range(1, num_opponents):
        best_portfolio_utility = None
        best_max_utilities = None
        best_response_to_add = None
        for j in range(num_opponents):
            if response_available[j]:
                new_max_utilities = np.maximum(max_utilities, utilities[j])
                new_portfolio_utility = np.mean(new_max_utilities)
                if not best_portfolio_utility or new_portfolio_utility > best_portfolio_utility:
                    best_portfolio_utility = new_portfolio_utility
                    best_max_utilities = new_max_utilities
                    best_response_to_add = j
        response_available[best_response_to_add] = False
        max_utilities = best_max_utilities
        portfolio_utilities[i] = best_portfolio_utility
        response_added[i] = best_response_to_add

    final_portfolio_size = None

    if portfolio_size > 0:
        final_portfolio_size = portfolio_size
    else:
        min_portfolio_utility = portfolio_utilities[0]
        max_portfolio_utility = portfolio_utilities[-1]
        total_utility_improvement = max_portfolio_utility - min_portfolio_utility
        minimal_improvement = total_utility_improvement * portfolio_cut_improvement_threshold
        final_portfolio_size = 1
        for i in range(1, num_opponents):
            if (portfolio_utilities[i] -
                    portfolio_utilities[i - 1]) >= minimal_improvement:
                final_portfolio_size += 1
            else:
                break

    if log:
        print('Utilities table:')
        for i in range(num_opponents):
            print('\t'.join([str(u) for u in utilities[i]]))
        print('Response added: %s' % response_added)
        print('Final portfolio size: %s' % final_portfolio_size)

        plt.figure(dpi=160)
        plt.plot(
            np.arange(num_opponents, dtype=np.intp) + 1, portfolio_utilities)
        plt.plot(final_portfolio_size,
                 portfolio_utilities[final_portfolio_size - 1],
                 marker='o',
                 color='r')
        plt.title('Portfolio utility')
        plt.xlabel('Portfolio size')
        plt.ylabel('Portfolio value [mbb/g]')
        plt.grid()

        if output_directory:
            plt.savefig('%s/portoflio_size_utility.png' % output_directory)
        else:
            plt.show()

    portfolio_response_indices = response_added[:final_portfolio_size]
    return np.take(response_strategies,
                   portfolio_response_indices), portfolio_response_indices
예제 #21
0
    def train_and_show_results(self, test_spec):
        game = acpc.read_game_file(test_spec['game_file_path'])

        exploitability = Exploitability(game)

        iteration_counts = np.zeros(0)
        exploitability_values = np.zeros([1, 0])
        best_exploitability = float("inf")
        best_exploitability_strategy = GameTreeBuilder(
            game, StrategyTreeNodeProvider()).build_tree()

        def checkpoint_callback(game_tree, checkpoint_index, iterations):
            nonlocal iteration_counts
            nonlocal exploitability_values
            nonlocal best_exploitability
            nonlocal best_exploitability_strategy

            iteration_counts = np.append(iteration_counts, iterations)

            if CHECK_STRATEGY_CORRECTNESS:
                self.assertTrue(is_correct_strategy(game_tree))

            exploitability_value = exploitability.evaluate(game_tree)
            exploitability_values = np.append(exploitability_values,
                                              exploitability_value)
            if COLLECT_MIN_EXPLOITABILITY and exploitability_value < best_exploitability:
                best_exploitability = exploitability_value
                copy_strategy(best_exploitability_strategy, game_tree)

        cfr = Cfr(game)
        cfr.train(test_spec['training_iterations'],
                  weight_delay=test_spec['weight_delay'],
                  checkpoint_iterations=test_spec['checkpoint_iterations'],
                  checkpoint_callback=checkpoint_callback,
                  minimal_action_probability=0.00006)

        best_response = BestResponse(game).solve(cfr.game_tree)
        player_utilities, _ = PlayerUtility(game).evaluate(
            cfr.game_tree, best_response)
        print(player_utilities.tolist())
        print('Exploitability: %s' % exploitability.evaluate(cfr.game_tree))

        if COLLECT_MIN_EXPLOITABILITY:
            min_exploitability = exploitability.evaluate(
                best_exploitability_strategy)
            min_exploitability_best_response = BestResponse(game).solve(
                best_exploitability_strategy)
            min_exploitability_player_utilities, _ = PlayerUtility(
                game).evaluate(best_exploitability_strategy,
                               min_exploitability_best_response)
            self.assertEqual(min_exploitability, exploitability_values.min())
            print('Minimum exploitability: %s' % min_exploitability)
            print('Minimum exploitability player utilities: %s' %
                  min_exploitability_player_utilities.tolist())
        else:
            print('Minimum exploitability: %s' % exploitability_values.min())

        plt.figure(dpi=160)
        plt.plot(iteration_counts, exploitability_values, linewidth=0.8)

        plt.title(test_spec['title'])
        plt.xlabel('Training iterations')
        plt.ylabel('Strategy exploitability [mbb/g]')
        plt.grid()

        game_name = test_spec['game_file_path'].split('/')[1][:-5]
        figure_output_path = '%s/%s(it:%s-st:%s).png' % (
            FIGURES_FOLDER, game_name, test_spec['training_iterations'],
            test_spec['checkpoint_iterations'])

        figures_directory = os.path.dirname(figure_output_path)
        if not os.path.exists(figures_directory):
            os.makedirs(figures_directory)

        plt.savefig(figure_output_path)

        write_strategy_to_file(
            cfr.game_tree, '%s/%s(it:%s).strategy' %
            (FIGURES_FOLDER, game_name, test_spec['training_iterations']), [
                '# Game utility against best response: %s' %
                player_utilities.tolist()
            ])
    def train_and_show_results(self, test_spec):
        game_file_path = test_spec['game_file_path']
        game = acpc.read_game_file(game_file_path)

        base_strategy, _ = read_strategy_from_file(
            game_file_path, test_spec['base_strategy_path'])

        agents = test_spec['opponent_tilt_types']
        num_agents = len(agents)

        game_name = game_file_path.split('/')[1][:-5]
        overwrite_figure = test_spec[
            'overwrite_figure'] if 'overwrite_figure' in test_spec else False
        figure_path = get_new_path(
            '%s/%s(it:%s-st:%s)' %
            (FIGURES_FOLDER, game_name, test_spec['training_iterations'],
             test_spec['checkpoint_iterations']), '.png', overwrite_figure)
        create_path_dirs(figure_path)

        exp = Exploitability(game)

        checkpoints_count = math.ceil(
            (test_spec['training_iterations'] - 700) /
            test_spec['checkpoint_iterations'])
        iteration_counts = np.zeros(checkpoints_count)
        exploitability_values = np.zeros([num_agents, checkpoints_count])
        vs_opponent_utility_values = np.zeros([num_agents, checkpoints_count])
        opponent_exploitability_values = np.zeros(num_agents)
        for i, agent in enumerate(agents):
            print('%s/%s' % (i + 1, num_agents))

            opponent_strategy = create_agent_strategy_from_trained_strategy(
                game_file_path, base_strategy, agent[0], agent[1], agent[2])

            self.assertTrue(is_correct_strategy(opponent_strategy))

            if 'print_opponent_strategies' in test_spec and test_spec[
                    'print_opponent_strategies']:
                write_strategy_to_file(
                    opponent_strategy, '%s/%s.strategy' %
                    (os.path.dirname(figure_path), get_agent_name(agent)))

            if 'print_best_responses' in test_spec and test_spec[
                    'print_best_responses']:
                opponent_best_response = BestResponse(game).solve(
                    opponent_strategy)
                write_strategy_to_file(
                    opponent_best_response, '%s/%s-best_response.strategy' %
                    (os.path.dirname(figure_path), get_agent_name(agent)))

            if PLOT_OPPONENT_EXPLOITABILITY:
                opponent_exploitability = exp.evaluate(opponent_strategy)
                opponent_exploitability_values[i] = opponent_exploitability
                print('%s exploitability: %s' %
                      (get_agent_name(agent), opponent_exploitability))

            def checkpoint_callback(game_tree, checkpoint_index, iterations):
                if i == 0:
                    iteration_counts[checkpoint_index] = iterations
                self.assertTrue(is_correct_strategy(game_tree))
                exploitability_values[i, checkpoint_index] = exp.evaluate(
                    game_tree)
                vs_opponent_utility_values[i, checkpoint_index] = exp.evaluate(
                    opponent_strategy, game_tree)

            rnr = RestrictedNashResponse(game, opponent_strategy, agent[3])
            rnr.train(test_spec['training_iterations'],
                      checkpoint_iterations=test_spec['checkpoint_iterations'],
                      checkpoint_callback=checkpoint_callback)

            if 'print_response_strategies' in test_spec and test_spec[
                    'print_response_strategies']:
                write_strategy_to_file(
                    rnr.game_tree,
                    '%s-%s-p=%s.strategy' % (figure_path[:-len('.png')],
                                             get_agent_name(agent), agent[3]))

            print('Vs opponent value: %s' %
                  exp.evaluate(opponent_strategy, rnr.game_tree))
            print('Exploitability: %s' % exp.evaluate(rnr.game_tree))

            plt.figure(dpi=300)
            ax = plt.subplot(111)
            for j in range(i + 1):
                p = plt.plot(iteration_counts,
                             exploitability_values[j],
                             label='%s-p=%s exploitability' %
                             (get_agent_name(agents[j]), agents[j][3]),
                             linewidth=LINE_WIDTH)
                plt.plot(iteration_counts,
                         vs_opponent_utility_values[j],
                         '--',
                         label='Utility against opponent strategy',
                         color=p[0].get_color(),
                         linewidth=LINE_WIDTH)
                if PLOT_OPPONENT_EXPLOITABILITY:
                    plt.plot(iteration_counts,
                             np.ones(checkpoints_count) *
                             opponent_exploitability_values[j],
                             ':',
                             label='Opponent exploitability',
                             color=p[0].get_color(),
                             linewidth=LINE_WIDTH)

            plt.title(test_spec['title'])
            plt.xlabel('Training iterations')
            plt.ylabel('Strategy exploitability [mbb/g]')
            plt.grid()
            handles, labels = ax.get_legend_handles_labels()
            new_handles = []
            new_labels = []
            for i in range(PLOT_COUNT_PER_AGENT):
                for j in range(i, len(handles), PLOT_COUNT_PER_AGENT):
                    new_handles += [handles[j]]
                    new_labels += [labels[j]]
            lgd = plt.legend(new_handles,
                             new_labels,
                             loc='upper center',
                             bbox_to_anchor=(0.5, -0.1),
                             ncol=PLOT_COUNT_PER_AGENT)

            plt.savefig(figure_path,
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight')

        print('Figure written to %s' % figure_path)
    def train_and_show_results(self, test_spec):
        game = acpc.read_game_file(test_spec['game_file_path'])

        weak_opponent_samples_tree = GameTreeBuilder(
            game, SamplesTreeNodeProvider()).build_tree()
        weak_opponent_strategy_tree = GameTreeBuilder(
            game, StrategyTreeNodeProvider()).build_tree()

        def on_node(samples_node, strategy_node):
            if isinstance(samples_node, ActionNode):
                child_count = len(samples_node.children)
                samples_count = random.randrange(15)
                for i, a in enumerate(samples_node.children):
                    if i < (child_count - 1) and samples_count > 0:
                        action_samples_count = random.randrange(samples_count +
                                                                1)
                        samples_count -= action_samples_count
                        samples_node.action_decision_counts[
                            a] = action_samples_count
                    else:
                        samples_node.action_decision_counts[a] = samples_count
                samples_sum = np.sum(samples_node.action_decision_counts)
                if samples_sum > 0:
                    strategy_node.strategy = samples_node.action_decision_counts / samples_sum
                else:
                    for a in strategy_node.children:
                        strategy_node.strategy[a] = 1 / len(
                            strategy_node.children)

        walk_trees(on_node, weak_opponent_samples_tree,
                   weak_opponent_strategy_tree)

        self.assertTrue(is_correct_strategy(weak_opponent_strategy_tree))

        exploitability = Exploitability(game)
        num_test_counts = test_spec['test_counts']
        data = np.zeros([num_test_counts, 2, len(P_MAX_VALUES)])
        for i in range(num_test_counts):
            print('%s/%s' % (i + 1, num_test_counts))

            for j, p_max in enumerate(P_MAX_VALUES):
                print('Pmax: %s - %s/%s' % (p_max, j + 1, len(P_MAX_VALUES)))

                dbr = DataBiasedResponse(game,
                                         weak_opponent_samples_tree,
                                         p_max=p_max)
                dbr.train(test_spec['training_iterations'])

                data[i, 0, j] = exploitability.evaluate(dbr.game_tree)
                data[i, 1,
                     j] = exploitability.evaluate(weak_opponent_strategy_tree,
                                                  dbr.game_tree)

                plt.figure(dpi=160)
                for k in range(i + 1):
                    run_index = math.floor(k / 2)
                    xdata = data[k,
                                 0, :] if k < i or j == (len(P_MAX_VALUES) -
                                                         1) else data[k, 0,
                                                                      0:j + 1]
                    ydata = data[k,
                                 1, :] if k < i or j == (len(P_MAX_VALUES) -
                                                         1) else data[k, 1,
                                                                      0:j + 1]
                    plt.plot(xdata,
                             ydata,
                             label='Run %s' % (run_index + 1),
                             marker='o',
                             linewidth=0.8)

                if 'title' in test_spec:
                    plt.title(test_spec['title'])
                plt.xlabel('DBR trained strategy exploitability [mbb/g]')
                plt.ylabel(
                    'Random opponent exploitation by DBR strategy [mbb/g]')
                plt.grid()
                if num_test_counts > 1:
                    plt.legend()

                game_name = test_spec['game_file_path'].split('/')[1][:-5]
                figure_output_path = '%s/%s(it:%s).png' % (
                    FIGURES_FOLDER, game_name,
                    test_spec['training_iterations'])

                figures_directory = os.path.dirname(figure_output_path)
                if not os.path.exists(figures_directory):
                    os.makedirs(figures_directory)

                plt.savefig(figure_output_path)

        print('\033[91mThis test needs your assistance! ' +
              'Check the generated graph %s!\033[0m' % figure_output_path)
    def create_agents_and_plot_exploitabilities(self, test_spec):
        base_strategy, _ = read_strategy_from_file(
            test_spec['game_file_path'],
            test_spec['base_strategy_path'])

        game = acpc.read_game_file(test_spec['game_file_path'])
        exploitability = Exploitability(game)

        plot_equilibrium = test_spec['plot_equilibrium'] if 'plot_equilibrium' in test_spec else True
        if plot_equilibrium:
            equilibrium_exploitability = exploitability.evaluate(base_strategy)

        tilt_probabilities = test_spec['tilt_probabilities']
        exploitability_values = np.zeros([len(TILT_TYPES), len(tilt_probabilities)])

        plot_exploitabilities = test_spec['plot_exploitabilities'] if 'plot_exploitabilities' in test_spec else True
        if plot_exploitabilities:
            for i, tilt_type in enumerate(TILT_TYPES):
                for j, tilt_probability in enumerate(tilt_probabilities):
                    tilted_agent = create_agent_strategy_from_trained_strategy(
                        test_spec['game_file_path'],
                        base_strategy,
                        tilt_type[1],
                        tilt_type[2],
                        tilt_probability)
                    exploitability_values[i, j] = exploitability.evaluate(tilted_agent)

                plt.figure(dpi=160)
                for j in range(i + 1):
                    plt.plot(
                        tilt_probabilities,
                        exploitability_values[j],
                        label=TILT_TYPES[j][0],
                        linewidth=0.8)

                if plot_equilibrium:
                    plt.plot(
                        tilt_probabilities,
                        [equilibrium_exploitability] * len(tilt_probabilities),
                        'r--',
                        label='Equilibrium',
                        linewidth=1.5)

                # plt.title(test_spec['title'])
                plt.xlabel('Tilt amount')
                plt.ylabel('Agent exploitability [mbb/g]')
                plt.grid()
                plt.legend()

                figure_output_path = '%s/%s.png' % (FIGURES_FOLDER, test_spec['figure_filename'])

                figures_directory = os.path.dirname(figure_output_path)
                if not os.path.exists(figures_directory):
                    os.makedirs(figures_directory)

                plt.savefig(figure_output_path)

        plot_agent_comparison = test_spec['plot_agent_comparison'] if 'plot_agent_comparison' in test_spec else False
        if plot_agent_comparison:
            agents_strategies = []
            agent_names = []
            for i, tilt_type in enumerate(TILT_TYPES):
                for j, tilt_probability in enumerate(tilt_probabilities):
                    agent_names += ['%s %s %s' % (str(tilt_type[1]).split('.')[1], str(tilt_type[2]).split('.')[1], tilt_probability)]
                    agents_strategies += [create_agent_strategy_from_trained_strategy(
                        test_spec['game_file_path'],
                        base_strategy,
                        tilt_type[1],
                        tilt_type[2],
                        tilt_probability)]

            num_agents = len(agent_names)
            scores_table = np.zeros([num_agents, num_agents])

            num_comparisons = 0
            for i in range(num_agents):
                for j in range(i, num_agents):
                    num_comparisons += 1

            with tqdm(total=num_comparisons) as pbar:
                for i in range(num_agents):
                    for j in range(i, num_agents):
                        scores_table[i, j] = exploitability.evaluate(agents_strategies[j], agents_strategies[i])
                        scores_table[j, i] = -scores_table[i, j]
                        pbar.update(1)

            max_score = scores_table.max()
            min_score = scores_table.min()

            # plt.figure(dpi=160)
            fig, ax = plt.subplots()

            cax = plt.imshow(scores_table, cmap=plt.cm.RdYlGn)
            plt.xticks(np.arange(num_agents), agent_names)
            plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha="right", rotation_mode="anchor")
            plt.yticks(np.arange(num_agents), agent_names)
            # plt.yticks(rotation=35)

            # plt.tick_params(
            #     axis='x',
            #     which='both',
            #     bottom=False,
            #     top=False,
            #     labelbottom=False)

            cbar = fig.colorbar(cax, ticks=[min_score, 0, max_score])
            cbar.ax.set_yticklabels([round(min_score), '0', round(max_score)])

            plt.tight_layout()
            plt.gcf().subplots_adjust(left=0.1)

            figure_output_path = '%s/%s-comparison.png' % (FIGURES_FOLDER, test_spec['figure_filename'])

            figures_directory = os.path.dirname(figure_output_path)
            if not os.path.exists(figures_directory):
                os.makedirs(figures_directory)

            plt.savefig(figure_output_path, dpi=160)
예제 #25
0
            strategy_file_lines_sorted = sorted(strategy_file_lines)
            progress.update(1)
    except NameError:
        strategy_file_lines_sorted = sorted(strategy_file_lines)

    strategy_file_lines_sorted = ['#  Training iterations: %s\n' % iterations
                                  ] + strategy_file_lines_sorted

    try:
        with tqdm(total=1) as progress:
            progress.set_description('Writing strategy file')
            _write_to_output_file(output_path, strategy_file_lines_sorted)
            progress.update(1)
    except NameError:
        _write_to_output_file(output_path, strategy_file_lines_sorted)


if __name__ == "__main__":
    if len(sys.argv) < 4:
        print("Usage {game_file_path} {iterations} {strategy_output_path}")
        sys.exit(1)

    iterations = int(sys.argv[2])
    output_path = sys.argv[3]
    game = acpc.read_game_file(sys.argv[1])

    cfr = Cfr(game)
    cfr.train(iterations)

    _write_strategy(cfr.game_tree, iterations, output_path)
    def run_evaluation(self, test_spec):
        print()

        workspace_dir = os.getcwd()

        game_file_path = workspace_dir + '/' + test_spec['game_file_path']
        game = acpc.read_game_file(game_file_path)

        if game.get_num_players() != 2:
            raise AttributeError('Only games with 2 players are supported')

        test_name = test_spec['test_name']
        base_agent = test_spec['base_agent']
        validation_agents = test_spec['validation_agents']
        num_matches = test_spec['num_matches']
        num_match_hands = test_spec['num_match_hands']

        game_name = game_file_path.split('/')[-1][:-len('.game')]

        validation_agent_names = [
            _get_agent_name(agent) for agent in validation_agents
        ]

        test_directory = '%s/%s/%s' % (workspace_dir, FILES_PATH, test_name)
        agents_data_directories = []
        for validation_agent in validation_agents:
            agent_data_dir = '%s/%s-[%s;%s]-%sx%s' % (
                test_directory, game_name, base_agent[0],
                _get_agent_name(validation_agent), num_matches,
                num_match_hands)
            agents_data_directories += [agent_data_dir]

        force_recreate_data = test_spec[
            'force_recreate_data'] if 'force_recreate_data' in test_spec else False

        base_validation_agent_strategy = None

        validation_agent_strategies = []

        for x in range(len(validation_agents)):
            agent_data_directory = agents_data_directories[x]
            validation_agent = validation_agents[x]
            data_created = True
            if not force_recreate_data:
                if os.path.exists(agent_data_directory):
                    for i in range(num_matches):
                        match_dir = '%s/match_%s' % (agent_data_directory, i)
                        if not os.path.exists(match_dir) or len(
                                os.listdir(match_dir)) == 0:
                            data_created = False
                            break
                else:
                    data_created = False

            if base_validation_agent_strategy is None:
                base_validation_agent_strategy, _ = read_strategy_from_file(
                    game_file_path,
                    test_spec['base_validation_agents_strategy_path'])

            validation_agent_strategy = create_agent_strategy_from_trained_strategy(
                game_file_path, base_validation_agent_strategy,
                validation_agent[0], validation_agent[1], validation_agent[2])

            validation_agent_strategies += [validation_agent_strategy]

            if not data_created or force_recreate_data:
                if os.path.exists(agent_data_directory):
                    shutil.rmtree(agent_data_directory)

                validation_agent_strategy_path = '%s/%s.strategy' % (
                    test_directory, _get_agent_name(validation_agent))

                write_strategy_to_file(validation_agent_strategy,
                                       validation_agent_strategy_path)

                for i in range(num_matches):
                    match_data_dir = '%s/match_%s' % (agent_data_directory, i)
                    if not os.path.exists(match_data_dir):
                        os.makedirs(match_data_dir)

                    seed = int(datetime.now().timestamp())

                    env = os.environ.copy()
                    env['PATH'] = os.path.dirname(
                        sys.executable) + ':' + env['PATH']

                    proc = subprocess.Popen([
                        MATCH_SCRIPT,
                        '%s/normal' % match_data_dir,
                        game_file_path,
                        str(num_match_hands),
                        str(seed),
                        base_agent[0],
                        _get_agent_name(validation_agent),
                    ],
                                            cwd=ACPC_INFRASTRUCTURE_DIR,
                                            env=env,
                                            stdout=subprocess.PIPE)
                    ports_string = proc.stdout.readline().decode(
                        'utf-8').strip()
                    ports = ports_string.split(' ')

                    args = [
                        (game_file_path, ports[0], base_agent[1]),
                        (game_file_path, ports[1],
                         validation_agent_strategy_path),
                    ]

                    with multiprocessing.Pool(2) as p:
                        p.map(_run_agent, args)

                    proc = subprocess.Popen([
                        MATCH_SCRIPT,
                        '%s/reversed' % match_data_dir,
                        game_file_path,
                        str(num_match_hands),
                        str(seed),
                        _get_agent_name(validation_agent),
                        base_agent[0],
                    ],
                                            cwd=ACPC_INFRASTRUCTURE_DIR,
                                            env=env,
                                            stdout=subprocess.PIPE)
                    ports_string = proc.stdout.readline().decode(
                        'utf-8').strip()
                    ports = ports_string.split(' ')

                    args = [
                        (game_file_path, ports[0],
                         validation_agent_strategy_path),
                        (game_file_path, ports[1], base_agent[1]),
                    ]

                    with multiprocessing.Pool(2) as p:
                        p.map(_run_agent, args)

                print('Data created')

        output = []

        def prin(string=''):
            nonlocal output
            output += [string]
            print(string)

        utility_estimators = test_spec['utility_estimators']

        agents_log_files_paths = []
        for x in range(len(validation_agents)):
            agents_data_directory = agents_data_directories[x]
            log_file_paths = []
            for i in range(num_matches):
                log_file_paths += [
                    '%s/match_%s/normal.log' % (agents_data_directory, i),
                    '%s/match_%s/reversed.log' % (agents_data_directory, i),
                ]
            agents_log_files_paths += [log_file_paths]

        agent_strategies = {}
        for i in range(len(validation_agents)):
            agent_strategies[
                validation_agent_names[i]] = validation_agent_strategies[i]

        prin(
            'Cell contains utility of row player based on observation of column player'
        )
        for utility_estimator_spec in utility_estimators:
            utility_estimator_name = utility_estimator_spec[0]
            utility_estimator_class = utility_estimator_spec[1]
            utility_estimator_instance = None
            if utility_estimator_class is not None:
                if len(utility_estimator_spec) == 2:
                    utility_estimator_instance = utility_estimator_class(
                        game, False)
                elif len(utility_estimator_spec) > 2:
                    utility_estimator_args = utility_estimator_spec[2]
                    utility_estimator_instance = utility_estimator_class(
                        game, False, **utility_estimator_args)

            prin()
            prin('%s (mean | SD)' % utility_estimator_name)

            output_table = [[None for j in range(len(validation_agents) + 1)]
                            for i in range(len(validation_agents))]
            for i in range(len(validation_agents)):
                output_table[i][0] = validation_agent_names[i]
            for x in range(len(validation_agents)):
                log_readings = [
                    get_player_utilities_from_log_file(
                        log_file_path,
                        game_file_path=game_file_path,
                        utility_estimator=utility_estimator_instance,
                        player_strategies=agent_strategies,
                        evaluated_strategies=validation_agent_strategies)
                    for log_file_path in agents_log_files_paths[x]
                ]

                data, player_names = get_logs_data(*log_readings)
                means = np.mean(data, axis=0)
                stds = np.std(data, axis=0)

                player_index = player_names.index(validation_agent_names[x])
                for y in range(len(validation_agents)):
                    output_table[y][x +
                                    1] = '%s | %s' % (means[player_index][y],
                                                      stds[player_index][y])

            prin(
                tabulate(output_table,
                         headers=validation_agent_names,
                         tablefmt='grid'))

        prin()
        prin('Total num hands: %s' % data.shape[0])

        output_log_path = get_new_path(
            '%s/output-%sx%s' % (test_directory, num_matches, num_match_hands),
            '.log')
        with open(output_log_path, 'w') as file:
            for line in output:
                file.write(line + '\n')