Ejemplo n.º 1
0
def test_pure_strategy_deviation_gains():
    """Test pure strategy deviation gains"""
    profiles = [[2, 0, 2, 0],
                [2, 0, 1, 1],
                [2, 0, 0, 2],
                [1, 1, 2, 0],
                [1, 1, 1, 1],
                [1, 1, 0, 2],
                [0, 2, 2, 0],
                [0, 2, 1, 1],
                [0, 2, 0, 2]]
    payoffs = [[1, 0, 2, 0],
               [3, 0, 4, 5],
               [6, 0, 0, 7],
               [8, 9, 10, 0],
               [11, 12, 13, 14],
               [15, 16, 0, 17],
               [0, 18, 19, 0],
               [0, 20, 21, 22],
               [0, 23, 0, 24]]
    game = paygame.game(2, [2, 2], profiles, payoffs)

    gains = regret.pure_strategy_deviation_gains(game, [2, 0, 2, 0])
    assert np.allclose(gains, [0, 8, 0, 0, 0, 3, 0, 0])
    gains = regret.pure_strategy_deviation_gains(game, [1, 1, 1, 1])
    assert np.allclose(gains, [0, 9, -9, 0, 0, 4, -4, 0])
Ejemplo n.º 2
0
def gains(game, serial, prof):
    """the gains from deviating from profile"""
    if is_pure_profile(game, prof):
        gains = regret.pure_strategy_deviation_gains(game, prof)
        return serial.to_deviation_payoff_json(prof, gains)
    else:
        gains = regret.mixture_deviation_gains(game, prof)
        return serial.to_prof_json(gains, False)
Ejemplo n.º 3
0
def calc_gains(game, prof):
    """the gains from deviating from profile"""
    if is_pure_profile(game, prof): # pylint: disable=no-else-return
        gains = regret.pure_strategy_deviation_gains(game, prof)
        return game.devpay_to_json(gains)
    else:
        gains = regret.mixture_deviation_gains(game, prof)
        return game.payoff_to_json(gains)
Ejemplo n.º 4
0
def _gains(game):
    """Returns the gains for deviating for every profile in the game

    Also returns the profile supports for indexing when the gains array should
    be zero because it's invalid versus having an actual zero gain."""
    return np.stack([
        regret.pure_strategy_deviation_gains(game, prof)
        for prof in game.profiles()])
Ejemplo n.º 5
0
def _gains(game):
    """Returns the gains for deviating for every profile in the game

    Also returns the profile supports for indexing when the gains array should
    be zero because it's invalid versus having an actual zero gain."""
    return np.stack([
        regret.pure_strategy_deviation_gains(game, prof)
        for prof in game.profiles()
    ])
Ejemplo n.º 6
0
def regret_matching(game, profile, *, slack=0.1):  # pylint: disable=too-many-locals
    """Regret matching

    Run regret matching. This selects new strategies to play proportionally to
    the gain they receive from deviating from the current profile.

    Parameters
    ----------
    game : Game
        The game to run replicator dynamics on. Game must support
        `deviation_payoffs`.
    profile : array_like
        The initial profile to start with.
    slack : float, optional
        The amount to make sure agents will always play their last played
        strategy. (0, 1)
    """
    strat_players = game.num_role_players.repeat(
        game.num_role_strats).astype(float)

    profile = np.asarray(profile, int)
    mean_gains = np.zeros(game.num_devs)
    mean_mix = profile / strat_players
    mus = np.full(game.num_roles, np.finfo(float).tiny)

    for i in itertools.count(1):  # pragma: no branch
        # Regret matching
        gains = regret.pure_strategy_deviation_gains(game, profile)
        gains *= profile.repeat(game.num_strat_devs)
        mean_gains += (gains - mean_gains) / i
        np.maximum(
            np.maximum.reduceat(
                np.add.reduceat(np.maximum(gains, 0), game.dev_strat_starts),
                game.role_starts), mus, mus)

        # For each strategy sample from regret matching distribution
        new_profile = np.zeros(game.num_strats, int)
        for rgains, prof, nprof, norm, strats in zip(
                np.split(np.maximum(mean_gains, 0), game.dev_role_starts[1:]),
                np.split(profile, game.role_starts[1:]),
                np.split(new_profile, game.role_starts[1:]), mus * (1 + slack),
                game.num_role_strats):
            probs = rgains / norm
            probs[np.arange(0, probs.size, strats + 1)] = 1 - np.add.reduceat(
                probs, np.arange(0, probs.size, strats))
            for count, prob in zip(prof, np.split(probs, strats)):
                nprof += np.random.multinomial(count, prob)

        # Test for convergence
        profile = new_profile
        mean_mix += (profile / strat_players - mean_mix) / (i + 1)
        yield mean_mix
Ejemplo n.º 7
0
def regret_matching(game, profile, *, slack=0.1): # pylint: disable=too-many-locals
    """Regret matching

    Run regret matching. This selects new strategies to play proportionally to
    the gain they receive from deviating from the current profile.

    Parameters
    ----------
    game : Game
        The game to run replicator dynamics on. Game must support
        `deviation_payoffs`.
    profile : array_like
        The initial profile to start with.
    slack : float, optional
        The amount to make sure agents will always play their last played
        strategy. (0, 1)
    """
    strat_players = game.num_role_players.repeat(
        game.num_role_strats).astype(float)

    profile = np.asarray(profile, int)
    mean_gains = np.zeros(game.num_devs)
    mean_mix = profile / strat_players
    mus = np.full(game.num_roles, np.finfo(float).tiny)

    for i in itertools.count(1): # pragma: no branch
        # Regret matching
        gains = regret.pure_strategy_deviation_gains(game, profile)
        gains *= profile.repeat(game.num_strat_devs)
        mean_gains += (gains - mean_gains) / i
        np.maximum(np.maximum.reduceat(
            np.add.reduceat(np.maximum(gains, 0), game.dev_strat_starts),
            game.role_starts), mus, mus)

        # For each strategy sample from regret matching distribution
        new_profile = np.zeros(game.num_strats, int)
        for rgains, prof, nprof, norm, strats in zip(
                np.split(np.maximum(mean_gains, 0), game.dev_role_starts[1:]),
                np.split(profile, game.role_starts[1:]),
                np.split(new_profile, game.role_starts[1:]),
                mus * (1 + slack),
                game.num_role_strats):
            probs = rgains / norm
            probs[np.arange(0, probs.size, strats + 1)] = 1 - np.add.reduceat(
                probs, np.arange(0, probs.size, strats))
            for count, prob in zip(prof, np.split(probs, strats)):
                nprof += np.random.multinomial(count, prob)

        # Test for convergence
        profile = new_profile
        mean_mix += (profile / strat_players - mean_mix) / (i + 1)
        yield mean_mix
Ejemplo n.º 8
0
def _gains(game):
    """Returns the gains for deviating for every profile in the game

    Also returns the profile supports for indexing when the gains array should
    be zero because it's invalid versus having an actual zero gain."""
    sizes = np.repeat(game.num_strategies - 1, game.num_strategies)
    offsets = np.insert(sizes.cumsum(), 0, 0)
    size = offsets[-1]
    offsets = offsets[:-1]
    gains = np.zeros((game.num_profiles, size))
    supports = game.profiles > 0

    for i, (prof, support) in enumerate(zip(game.profiles, supports)):
        regs = regret.pure_strategy_deviation_gains(game, prof)
        reps = game.num_strategies[game.role_index[support]] - 1
        reg_offsets = np.insert(reps[:-1].cumsum(), 0, 0)
        inds = (np.repeat(offsets[support] - reg_offsets, reps) +
                np.arange(regs.size))
        gains[i, inds] = regs

    return gains, supports
Ejemplo n.º 9
0
def test_empty_pure_strategy_deviation_gains():
    """Test empty pure strategy deviation gains"""
    game = rsgame.empty(2, [2, 2])
    gains = regret.pure_strategy_deviation_gains(game, [2, 0, 2, 0])
    expected = [np.nan, np.nan, 0, 0, np.nan, np.nan, 0, 0]
    assert np.allclose(gains, expected, equal_nan=True)