def copy_strategy(dst, src): def on_node(dst_node, src_node): if isinstance(dst_node, ActionNode): np.copyto(dst_node.strategy, src_node.strategy) return [src_node.children[a] for a in src_node.children] walk_trees(on_node, dst, src)
def read_strategy_from_file(game, strategy_file_path): strategy = {} with open(strategy_file_path, 'r') as strategy_file: for line in strategy_file: if not line.strip() or line.strip().startswith('#'): continue line_split = line.split(' ') strategy[line_split[0]] = [ float(probStr) for probStr in line_split[1:4] ] if not game: return strategy game_instance = acpc.read_game_file(game) if isinstance(game, str) else game strategy_tree = GameTreeBuilder(game_instance, StrategyTreeNodeProvider()).build_tree() def on_node(node): if isinstance(node, ActionNode): nonlocal strategy node_strategy = np.array(strategy[str(node)]) np.copyto(node.strategy, node_strategy) walk_trees(on_node, strategy_tree) return strategy_tree, strategy
def is_strategies_equal(first, second): equal = True def on_node(first_node, second_node): if isinstance(first_node, ActionNode): for a in range(3): if not isclose(first_node.strategy[a], second_node.strategy[a]): nonlocal equal equal = False walk_trees(on_node, first, second) return equal
def __init__(self, game, strategies): self.strategy = GameTreeBuilder( game, StrategiesWeightedMixtureTreeNodeProvider()).build_tree() self.weights = np.ones(len(strategies)) / len(strategies) def on_nodes(*nodes): mixture_node = nodes[0] if isinstance(mixture_node, ActionNode): mixture_node.weights = self.weights mixture_node.strategies = np.zeros([len(strategies), 3]) for i, node in enumerate(nodes[1:]): mixture_node.strategies[i, :] = node.strategy walk_trees(on_nodes, self.strategy, *strategies)
def is_correct_strategy(strategy_tree): correct = True def on_node(node): if isinstance(node, ActionNode): nonlocal correct strategy_sum = np.sum(node.strategy) if not isclose(strategy_sum, 1): correct = False for i in range(3): if i not in node.children and node.strategy[i] != 0: correct = False walk_trees(on_node, strategy_tree) return correct
def test_strategy_writing_and_reading(self): game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH) strategy_tree = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree() def on_node(node): if isinstance(node, ActionNode): for a in range(3): if a in node.children: node.strategy[a] = 0.5 else: node.strategy[a] = 7 walk_trees(on_node, strategy_tree) write_strategy_to_file(strategy_tree, 'test/io_test_dummy.strategy') read_strategy_tree, _ = read_strategy_from_file(KUHN_POKER_GAME_FILE_PATH, 'test/io_test_dummy.strategy') self.assertTrue(is_strategies_equal(strategy_tree, read_strategy_tree))
def __init__( self, game, opponent_strategy_tree, p, show_progress=True): super().__init__(game, show_progress) self.p = p opponent_strategy = {} def callback(node): if isinstance(node, ActionNode): nonlocal opponent_strategy opponent_strategy[str(node)] = node.strategy walk_trees(callback, opponent_strategy_tree) self.opponent_strategy = opponent_strategy
def __init__(self, game, opponent_sample_tree, p_max=0.8, show_progress=True): super().__init__(game, show_progress) self.p_max = p_max opponent_action_decision_counts = {} def callback(node): if isinstance(node, ActionNode): nonlocal opponent_action_decision_counts opponent_action_decision_counts[str( node)] = node.action_decision_counts walk_trees(callback, opponent_sample_tree) self.opponent_action_decision_counts = opponent_action_decision_counts
def test_leduc_rnr_works(self): game = acpc.read_game_file(LEDUC_POKER_GAME_FILE_PATH) opponent_strategy = GameTreeBuilder( game, StrategyTreeNodeProvider()).build_tree() def on_node(node): if isinstance(node, ActionNode): action_count = len(node.children) action_probability = 1 / action_count for a in node.children: node.strategy[a] = action_probability walk_trees(on_node, opponent_strategy) rnr = RestrictedNashResponse(game, opponent_strategy, 0.5, show_progress=False) rnr.train(10, 5)
def create_agent_strategy_from_trained_strategy( game_file_path, strategy_tree, tilt_action, tilt_type, tilt_probability, in_place=False): tilt_action_index = tilt_action.value def on_node(node): if tilt_action_index in node.children: original_tilt_action_probability = node.strategy[tilt_action_index] new_tilt_action_probability = None if tilt_type == TiltType.ADD: new_tilt_action_probability = np.clip(original_tilt_action_probability + tilt_probability, 0, 1) elif tilt_type == TiltType.MULTIPLY: new_tilt_action_probability = np.clip( original_tilt_action_probability + original_tilt_action_probability * tilt_probability, 0, 1) node.strategy[tilt_action_index] = new_tilt_action_probability diff = new_tilt_action_probability - original_tilt_action_probability other_actions_probability = 1 - original_tilt_action_probability if diff != 0 and other_actions_probability == 0: other_action_probability_diff = diff / (len(node.children) - 1) for a in filter(lambda a: a != tilt_action_index, node.children): node.strategy[a] -= other_action_probability_diff elif diff != 0: for a in filter(lambda a: a != tilt_action_index, node.children): node.strategy[a] -= diff * (node.strategy[a] / other_actions_probability) result_strategy = None if in_place: result_strategy = strategy_tree else: game = acpc.read_game_file(game_file_path) result_strategy = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree() copy_strategy(result_strategy, strategy_tree) walk_trees(on_node, result_strategy) return result_strategy
def create_strategy(self, game, node_strategy_creator_callback): strategy = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree() walk_trees(node_strategy_creator_callback, strategy) return strategy
def train_and_show_results(self, test_spec): game = acpc.read_game_file(test_spec['game_file_path']) weak_opponent_samples_tree = GameTreeBuilder( game, SamplesTreeNodeProvider()).build_tree() weak_opponent_strategy_tree = GameTreeBuilder( game, StrategyTreeNodeProvider()).build_tree() def on_node(samples_node, strategy_node): if isinstance(samples_node, ActionNode): child_count = len(samples_node.children) samples_count = random.randrange(15) for i, a in enumerate(samples_node.children): if i < (child_count - 1) and samples_count > 0: action_samples_count = random.randrange(samples_count + 1) samples_count -= action_samples_count samples_node.action_decision_counts[ a] = action_samples_count else: samples_node.action_decision_counts[a] = samples_count samples_sum = np.sum(samples_node.action_decision_counts) if samples_sum > 0: strategy_node.strategy = samples_node.action_decision_counts / samples_sum else: for a in strategy_node.children: strategy_node.strategy[a] = 1 / len( strategy_node.children) walk_trees(on_node, weak_opponent_samples_tree, weak_opponent_strategy_tree) self.assertTrue(is_correct_strategy(weak_opponent_strategy_tree)) exploitability = Exploitability(game) num_test_counts = test_spec['test_counts'] data = np.zeros([num_test_counts, 2, len(P_MAX_VALUES)]) for i in range(num_test_counts): print('%s/%s' % (i + 1, num_test_counts)) for j, p_max in enumerate(P_MAX_VALUES): print('Pmax: %s - %s/%s' % (p_max, j + 1, len(P_MAX_VALUES))) dbr = DataBiasedResponse(game, weak_opponent_samples_tree, p_max=p_max) dbr.train(test_spec['training_iterations']) data[i, 0, j] = exploitability.evaluate(dbr.game_tree) data[i, 1, j] = exploitability.evaluate(weak_opponent_strategy_tree, dbr.game_tree) plt.figure(dpi=160) for k in range(i + 1): run_index = math.floor(k / 2) xdata = data[k, 0, :] if k < i or j == (len(P_MAX_VALUES) - 1) else data[k, 0, 0:j + 1] ydata = data[k, 1, :] if k < i or j == (len(P_MAX_VALUES) - 1) else data[k, 1, 0:j + 1] plt.plot(xdata, ydata, label='Run %s' % (run_index + 1), marker='o', linewidth=0.8) if 'title' in test_spec: plt.title(test_spec['title']) plt.xlabel('DBR trained strategy exploitability [mbb/g]') plt.ylabel( 'Random opponent exploitation by DBR strategy [mbb/g]') plt.grid() if num_test_counts > 1: plt.legend() game_name = test_spec['game_file_path'].split('/')[1][:-5] figure_output_path = '%s/%s(it:%s).png' % ( FIGURES_FOLDER, game_name, test_spec['training_iterations']) figures_directory = os.path.dirname(figure_output_path) if not os.path.exists(figures_directory): os.makedirs(figures_directory) plt.savefig(figure_output_path) print('\033[91mThis test needs your assistance! ' + 'Check the generated graph %s!\033[0m' % figure_output_path)