def test_pure_strategy_deviation_gains(): """Test pure strategy deviation gains""" profiles = [[2, 0, 2, 0], [2, 0, 1, 1], [2, 0, 0, 2], [1, 1, 2, 0], [1, 1, 1, 1], [1, 1, 0, 2], [0, 2, 2, 0], [0, 2, 1, 1], [0, 2, 0, 2]] payoffs = [[1, 0, 2, 0], [3, 0, 4, 5], [6, 0, 0, 7], [8, 9, 10, 0], [11, 12, 13, 14], [15, 16, 0, 17], [0, 18, 19, 0], [0, 20, 21, 22], [0, 23, 0, 24]] game = paygame.game(2, [2, 2], profiles, payoffs) gains = regret.pure_strategy_deviation_gains(game, [2, 0, 2, 0]) assert np.allclose(gains, [0, 8, 0, 0, 0, 3, 0, 0]) gains = regret.pure_strategy_deviation_gains(game, [1, 1, 1, 1]) assert np.allclose(gains, [0, 9, -9, 0, 0, 4, -4, 0])
def gains(game, serial, prof): """the gains from deviating from profile""" if is_pure_profile(game, prof): gains = regret.pure_strategy_deviation_gains(game, prof) return serial.to_deviation_payoff_json(prof, gains) else: gains = regret.mixture_deviation_gains(game, prof) return serial.to_prof_json(gains, False)
def calc_gains(game, prof): """the gains from deviating from profile""" if is_pure_profile(game, prof): # pylint: disable=no-else-return gains = regret.pure_strategy_deviation_gains(game, prof) return game.devpay_to_json(gains) else: gains = regret.mixture_deviation_gains(game, prof) return game.payoff_to_json(gains)
def _gains(game): """Returns the gains for deviating for every profile in the game Also returns the profile supports for indexing when the gains array should be zero because it's invalid versus having an actual zero gain.""" return np.stack([ regret.pure_strategy_deviation_gains(game, prof) for prof in game.profiles()])
def _gains(game): """Returns the gains for deviating for every profile in the game Also returns the profile supports for indexing when the gains array should be zero because it's invalid versus having an actual zero gain.""" return np.stack([ regret.pure_strategy_deviation_gains(game, prof) for prof in game.profiles() ])
def regret_matching(game, profile, *, slack=0.1): # pylint: disable=too-many-locals """Regret matching Run regret matching. This selects new strategies to play proportionally to the gain they receive from deviating from the current profile. Parameters ---------- game : Game The game to run replicator dynamics on. Game must support `deviation_payoffs`. profile : array_like The initial profile to start with. slack : float, optional The amount to make sure agents will always play their last played strategy. (0, 1) """ strat_players = game.num_role_players.repeat( game.num_role_strats).astype(float) profile = np.asarray(profile, int) mean_gains = np.zeros(game.num_devs) mean_mix = profile / strat_players mus = np.full(game.num_roles, np.finfo(float).tiny) for i in itertools.count(1): # pragma: no branch # Regret matching gains = regret.pure_strategy_deviation_gains(game, profile) gains *= profile.repeat(game.num_strat_devs) mean_gains += (gains - mean_gains) / i np.maximum( np.maximum.reduceat( np.add.reduceat(np.maximum(gains, 0), game.dev_strat_starts), game.role_starts), mus, mus) # For each strategy sample from regret matching distribution new_profile = np.zeros(game.num_strats, int) for rgains, prof, nprof, norm, strats in zip( np.split(np.maximum(mean_gains, 0), game.dev_role_starts[1:]), np.split(profile, game.role_starts[1:]), np.split(new_profile, game.role_starts[1:]), mus * (1 + slack), game.num_role_strats): probs = rgains / norm probs[np.arange(0, probs.size, strats + 1)] = 1 - np.add.reduceat( probs, np.arange(0, probs.size, strats)) for count, prob in zip(prof, np.split(probs, strats)): nprof += np.random.multinomial(count, prob) # Test for convergence profile = new_profile mean_mix += (profile / strat_players - mean_mix) / (i + 1) yield mean_mix
def regret_matching(game, profile, *, slack=0.1): # pylint: disable=too-many-locals """Regret matching Run regret matching. This selects new strategies to play proportionally to the gain they receive from deviating from the current profile. Parameters ---------- game : Game The game to run replicator dynamics on. Game must support `deviation_payoffs`. profile : array_like The initial profile to start with. slack : float, optional The amount to make sure agents will always play their last played strategy. (0, 1) """ strat_players = game.num_role_players.repeat( game.num_role_strats).astype(float) profile = np.asarray(profile, int) mean_gains = np.zeros(game.num_devs) mean_mix = profile / strat_players mus = np.full(game.num_roles, np.finfo(float).tiny) for i in itertools.count(1): # pragma: no branch # Regret matching gains = regret.pure_strategy_deviation_gains(game, profile) gains *= profile.repeat(game.num_strat_devs) mean_gains += (gains - mean_gains) / i np.maximum(np.maximum.reduceat( np.add.reduceat(np.maximum(gains, 0), game.dev_strat_starts), game.role_starts), mus, mus) # For each strategy sample from regret matching distribution new_profile = np.zeros(game.num_strats, int) for rgains, prof, nprof, norm, strats in zip( np.split(np.maximum(mean_gains, 0), game.dev_role_starts[1:]), np.split(profile, game.role_starts[1:]), np.split(new_profile, game.role_starts[1:]), mus * (1 + slack), game.num_role_strats): probs = rgains / norm probs[np.arange(0, probs.size, strats + 1)] = 1 - np.add.reduceat( probs, np.arange(0, probs.size, strats)) for count, prob in zip(prof, np.split(probs, strats)): nprof += np.random.multinomial(count, prob) # Test for convergence profile = new_profile mean_mix += (profile / strat_players - mean_mix) / (i + 1) yield mean_mix
def _gains(game): """Returns the gains for deviating for every profile in the game Also returns the profile supports for indexing when the gains array should be zero because it's invalid versus having an actual zero gain.""" sizes = np.repeat(game.num_strategies - 1, game.num_strategies) offsets = np.insert(sizes.cumsum(), 0, 0) size = offsets[-1] offsets = offsets[:-1] gains = np.zeros((game.num_profiles, size)) supports = game.profiles > 0 for i, (prof, support) in enumerate(zip(game.profiles, supports)): regs = regret.pure_strategy_deviation_gains(game, prof) reps = game.num_strategies[game.role_index[support]] - 1 reg_offsets = np.insert(reps[:-1].cumsum(), 0, 0) inds = (np.repeat(offsets[support] - reg_offsets, reps) + np.arange(regs.size)) gains[i, inds] = regs return gains, supports
def test_empty_pure_strategy_deviation_gains(): """Test empty pure strategy deviation gains""" game = rsgame.empty(2, [2, 2]) gains = regret.pure_strategy_deviation_gains(game, [2, 0, 2, 0]) expected = [np.nan, np.nan, 0, 0, np.nan, np.nan, 0, 0] assert np.allclose(gains, expected, equal_nan=True)