def __init__(self, game, show_progress=True): """Build new CFR instance. Args: game (Game): ACPC game definition object. """ self.game = game self.show_progress = show_progress if game.get_num_players() != 2: raise AttributeError('Only games with 2 players are supported') if game.get_betting_type() != acpc.BettingType.LIMIT: raise AttributeError('No-limit betting games not supported') total_cards_count = game.get_num_hole_cards() \ + game.get_total_num_board_cards(game.get_num_rounds() - 1) if total_cards_count > 5: raise AttributeError('Only games with up to 5 cards are supported') game_tree_builder = GameTreeBuilder(game, CfrNodeProvider()) if not self.show_progress: self.game_tree = game_tree_builder.build_tree() else: try: with tqdm(total=1) as progress: progress.set_description('Building game tree') self.game_tree = game_tree_builder.build_tree() progress.update(1) except NameError: self.game_tree = game_tree_builder.build_tree()
def solve(self, strategy): game_tree_builder = GameTreeBuilder(self.game, StrategyTreeNodeProvider()) best_response = game_tree_builder.build_tree() for position in range(2): self._solve(position, best_response, np.array([[strategy, 1, ()]]), [], []) return best_response
def read_strategy_from_file(game, strategy_file_path): strategy = {} with open(strategy_file_path, 'r') as strategy_file: for line in strategy_file: if not line.strip() or line.strip().startswith('#'): continue line_split = line.split(' ') strategy[line_split[0]] = [ float(probStr) for probStr in line_split[1:4] ] if not game: return strategy game_instance = acpc.read_game_file(game) if isinstance(game, str) else game strategy_tree = GameTreeBuilder(game_instance, StrategyTreeNodeProvider()).build_tree() def on_node(node): if isinstance(node, ActionNode): nonlocal strategy node_strategy = np.array(strategy[str(node)]) np.copyto(node.strategy, node_strategy) walk_trees(on_node, strategy_tree) return strategy_tree, strategy
def __init__(self, game, strategies): self.strategy = GameTreeBuilder( game, StrategiesWeightedMixtureTreeNodeProvider()).build_tree() self.weights = np.ones(len(strategies)) / len(strategies) def on_nodes(*nodes): mixture_node = nodes[0] if isinstance(mixture_node, ActionNode): mixture_node.weights = self.weights mixture_node.strategies = np.zeros([len(strategies), 3]) for i, node in enumerate(nodes[1:]): mixture_node.strategies[i, :] = node.strategy walk_trees(on_nodes, self.strategy, *strategies)
def test_strategy_writing_and_reading(self): game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH) strategy_tree = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree() def on_node(node): if isinstance(node, ActionNode): for a in range(3): if a in node.children: node.strategy[a] = 0.5 else: node.strategy[a] = 7 walk_trees(on_node, strategy_tree) write_strategy_to_file(strategy_tree, 'test/io_test_dummy.strategy') read_strategy_tree, _ = read_strategy_from_file(KUHN_POKER_GAME_FILE_PATH, 'test/io_test_dummy.strategy') self.assertTrue(is_strategies_equal(strategy_tree, read_strategy_tree))
def test_leduc_rnr_works(self): game = acpc.read_game_file(LEDUC_POKER_GAME_FILE_PATH) opponent_strategy = GameTreeBuilder( game, StrategyTreeNodeProvider()).build_tree() def on_node(node): if isinstance(node, ActionNode): action_count = len(node.children) action_probability = 1 / action_count for a in node.children: node.strategy[a] = action_probability walk_trees(on_node, opponent_strategy) rnr = RestrictedNashResponse(game, opponent_strategy, 0.5, show_progress=False) rnr.train(10, 5)
def create_agent_strategy_from_trained_strategy( game_file_path, strategy_tree, tilt_action, tilt_type, tilt_probability, in_place=False): tilt_action_index = tilt_action.value def on_node(node): if tilt_action_index in node.children: original_tilt_action_probability = node.strategy[tilt_action_index] new_tilt_action_probability = None if tilt_type == TiltType.ADD: new_tilt_action_probability = np.clip(original_tilt_action_probability + tilt_probability, 0, 1) elif tilt_type == TiltType.MULTIPLY: new_tilt_action_probability = np.clip( original_tilt_action_probability + original_tilt_action_probability * tilt_probability, 0, 1) node.strategy[tilt_action_index] = new_tilt_action_probability diff = new_tilt_action_probability - original_tilt_action_probability other_actions_probability = 1 - original_tilt_action_probability if diff != 0 and other_actions_probability == 0: other_action_probability_diff = diff / (len(node.children) - 1) for a in filter(lambda a: a != tilt_action_index, node.children): node.strategy[a] -= other_action_probability_diff elif diff != 0: for a in filter(lambda a: a != tilt_action_index, node.children): node.strategy[a] -= diff * (node.strategy[a] / other_actions_probability) result_strategy = None if in_place: result_strategy = strategy_tree else: game = acpc.read_game_file(game_file_path) result_strategy = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree() copy_strategy(result_strategy, strategy_tree) walk_trees(on_node, result_strategy) return result_strategy
def read_log_file(game_file_path, log_file_path, player_names, player_trees=None): game = acpc.read_game_file(game_file_path) num_players = game.get_num_players() if len(player_names) != num_players: raise AttributeError('Wrong number of player names provided') if game.get_betting_type() != acpc.BettingType.LIMIT: raise AttributeError('Only limit betting games are supported') players = {} for i in range(num_players): player_name = player_names[i] player_tree = None if player_trees and player_name in player_trees: player_tree = player_trees[player_name] else: player_tree = GameTreeBuilder( game, SamplesTreeNodeProvider()).build_tree() players[player_name] = player_tree with open(log_file_path, 'r') as strategy_file: for line in strategy_file: if not line.strip() or line.strip().startswith('#') or len( line.split(':')) == 3: continue player_names = [ name.strip() for name in line.split(':')[-1].split('|') ] state = acpc.parse_state(game_file_path, line) current_player_trees = [players[name] for name in player_names] _add_state_to_sample_trees(game, state, current_player_trees, 0, 0) return players
def create_strategy(self, game, node_strategy_creator_callback): strategy = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree() walk_trees(node_strategy_creator_callback, strategy) return strategy
def train(self, opponent_strategy, exploitability, max_exploitability_delta): result_strategy = GameTreeBuilder( self.game, StrategyTreeNodeProvider()).build_tree() best_exploitability = float('inf') best_exploitability_delta = float('inf') def checkpoint_callback(game_tree, checkpoint_index, iterations): if iterations <= ((3 / 4) * self.iterations): # Make sure the strategy at least partially converged return nonlocal result_strategy nonlocal best_exploitability_delta nonlocal best_exploitability current_exploitability = self.exp.evaluate(game_tree) current_exploitability_delta = abs(current_exploitability - exploitability) if current_exploitability_delta < best_exploitability_delta: if current_exploitability_delta <= max_exploitability_delta: copy_strategy(result_strategy, game_tree) best_exploitability_delta = current_exploitability_delta best_exploitability = current_exploitability iteration = 0 p_low = 0 p_high = 1 if self.show_progress: print() print('Exploitability: %s +- %s' % (exploitability, max_exploitability_delta)) while True: if self.show_progress: iteration += 1 print('Run %s' % iteration) print('Interval: %s - %s' % (p_low, p_high)) p_current = p_low + (p_high - p_low) / 2 rnr = RestrictedNashResponse(self.game, opponent_strategy, p_current, show_progress=self.show_progress) if self.weight_delay: rnr.train(self.iterations, checkpoint_iterations=self.checkpoint_iterations, checkpoint_callback=checkpoint_callback, weight_delay=self.weight_delay) else: rnr.train(self.iterations, checkpoint_iterations=self.checkpoint_iterations, checkpoint_callback=checkpoint_callback) if best_exploitability_delta < max_exploitability_delta: print('Result exploitability: %s, p=%s' % (best_exploitability, p_current)) return result_strategy, best_exploitability, p_current if self.show_progress: print('Exploitability: %s, p=%s, current_delta=%s' % (best_exploitability, p_current, best_exploitability_delta)) if best_exploitability > exploitability: p_high = p_current else: p_low = p_current best_exploitability = float('inf') best_exploitability_delta = float('inf')
def train_and_show_results(self, test_spec): game = acpc.read_game_file(test_spec['game_file_path']) weak_opponent_samples_tree = GameTreeBuilder( game, SamplesTreeNodeProvider()).build_tree() weak_opponent_strategy_tree = GameTreeBuilder( game, StrategyTreeNodeProvider()).build_tree() def on_node(samples_node, strategy_node): if isinstance(samples_node, ActionNode): child_count = len(samples_node.children) samples_count = random.randrange(15) for i, a in enumerate(samples_node.children): if i < (child_count - 1) and samples_count > 0: action_samples_count = random.randrange(samples_count + 1) samples_count -= action_samples_count samples_node.action_decision_counts[ a] = action_samples_count else: samples_node.action_decision_counts[a] = samples_count samples_sum = np.sum(samples_node.action_decision_counts) if samples_sum > 0: strategy_node.strategy = samples_node.action_decision_counts / samples_sum else: for a in strategy_node.children: strategy_node.strategy[a] = 1 / len( strategy_node.children) walk_trees(on_node, weak_opponent_samples_tree, weak_opponent_strategy_tree) self.assertTrue(is_correct_strategy(weak_opponent_strategy_tree)) exploitability = Exploitability(game) num_test_counts = test_spec['test_counts'] data = np.zeros([num_test_counts, 2, len(P_MAX_VALUES)]) for i in range(num_test_counts): print('%s/%s' % (i + 1, num_test_counts)) for j, p_max in enumerate(P_MAX_VALUES): print('Pmax: %s - %s/%s' % (p_max, j + 1, len(P_MAX_VALUES))) dbr = DataBiasedResponse(game, weak_opponent_samples_tree, p_max=p_max) dbr.train(test_spec['training_iterations']) data[i, 0, j] = exploitability.evaluate(dbr.game_tree) data[i, 1, j] = exploitability.evaluate(weak_opponent_strategy_tree, dbr.game_tree) plt.figure(dpi=160) for k in range(i + 1): run_index = math.floor(k / 2) xdata = data[k, 0, :] if k < i or j == (len(P_MAX_VALUES) - 1) else data[k, 0, 0:j + 1] ydata = data[k, 1, :] if k < i or j == (len(P_MAX_VALUES) - 1) else data[k, 1, 0:j + 1] plt.plot(xdata, ydata, label='Run %s' % (run_index + 1), marker='o', linewidth=0.8) if 'title' in test_spec: plt.title(test_spec['title']) plt.xlabel('DBR trained strategy exploitability [mbb/g]') plt.ylabel( 'Random opponent exploitation by DBR strategy [mbb/g]') plt.grid() if num_test_counts > 1: plt.legend() game_name = test_spec['game_file_path'].split('/')[1][:-5] figure_output_path = '%s/%s(it:%s).png' % ( FIGURES_FOLDER, game_name, test_spec['training_iterations']) figures_directory = os.path.dirname(figure_output_path) if not os.path.exists(figures_directory): os.makedirs(figures_directory) plt.savefig(figure_output_path) print('\033[91mThis test needs your assistance! ' + 'Check the generated graph %s!\033[0m' % figure_output_path)
def train_and_show_results(self, test_spec): game = acpc.read_game_file(test_spec['game_file_path']) exploitability = Exploitability(game) iteration_counts = np.zeros(0) exploitability_values = np.zeros([1, 0]) best_exploitability = float("inf") best_exploitability_strategy = GameTreeBuilder( game, StrategyTreeNodeProvider()).build_tree() def checkpoint_callback(game_tree, checkpoint_index, iterations): nonlocal iteration_counts nonlocal exploitability_values nonlocal best_exploitability nonlocal best_exploitability_strategy iteration_counts = np.append(iteration_counts, iterations) if CHECK_STRATEGY_CORRECTNESS: self.assertTrue(is_correct_strategy(game_tree)) exploitability_value = exploitability.evaluate(game_tree) exploitability_values = np.append(exploitability_values, exploitability_value) if COLLECT_MIN_EXPLOITABILITY and exploitability_value < best_exploitability: best_exploitability = exploitability_value copy_strategy(best_exploitability_strategy, game_tree) cfr = Cfr(game) cfr.train(test_spec['training_iterations'], weight_delay=test_spec['weight_delay'], checkpoint_iterations=test_spec['checkpoint_iterations'], checkpoint_callback=checkpoint_callback, minimal_action_probability=0.00006) best_response = BestResponse(game).solve(cfr.game_tree) player_utilities, _ = PlayerUtility(game).evaluate( cfr.game_tree, best_response) print(player_utilities.tolist()) print('Exploitability: %s' % exploitability.evaluate(cfr.game_tree)) if COLLECT_MIN_EXPLOITABILITY: min_exploitability = exploitability.evaluate( best_exploitability_strategy) min_exploitability_best_response = BestResponse(game).solve( best_exploitability_strategy) min_exploitability_player_utilities, _ = PlayerUtility( game).evaluate(best_exploitability_strategy, min_exploitability_best_response) self.assertEqual(min_exploitability, exploitability_values.min()) print('Minimum exploitability: %s' % min_exploitability) print('Minimum exploitability player utilities: %s' % min_exploitability_player_utilities.tolist()) else: print('Minimum exploitability: %s' % exploitability_values.min()) plt.figure(dpi=160) plt.plot(iteration_counts, exploitability_values, linewidth=0.8) plt.title(test_spec['title']) plt.xlabel('Training iterations') plt.ylabel('Strategy exploitability [mbb/g]') plt.grid() game_name = test_spec['game_file_path'].split('/')[1][:-5] figure_output_path = '%s/%s(it:%s-st:%s).png' % ( FIGURES_FOLDER, game_name, test_spec['training_iterations'], test_spec['checkpoint_iterations']) figures_directory = os.path.dirname(figure_output_path) if not os.path.exists(figures_directory): os.makedirs(figures_directory) plt.savefig(figure_output_path) write_strategy_to_file( cfr.game_tree, '%s/%s(it:%s).strategy' % (FIGURES_FOLDER, game_name, test_spec['training_iterations']), [ '# Game utility against best response: %s' % player_utilities.tolist() ])