Ejemplos de pure_strategy_deviation_gains en Python, ejemplos de gameanalysis.regret.pure_strategy_deviation_gains en Python

Ejemplo n.º 1

0

Mostrar archivo

def test_pure_strategy_deviation_gains():
    """Test pure strategy deviation gains"""
    profiles = [[2, 0, 2, 0],
                [2, 0, 1, 1],
                [2, 0, 0, 2],
                [1, 1, 2, 0],
                [1, 1, 1, 1],
                [1, 1, 0, 2],
                [0, 2, 2, 0],
                [0, 2, 1, 1],
                [0, 2, 0, 2]]
    payoffs = [[1, 0, 2, 0],
               [3, 0, 4, 5],
               [6, 0, 0, 7],
               [8, 9, 10, 0],
               [11, 12, 13, 14],
               [15, 16, 0, 17],
               [0, 18, 19, 0],
               [0, 20, 21, 22],
               [0, 23, 0, 24]]
    game = paygame.game(2, [2, 2], profiles, payoffs)

    gains = regret.pure_strategy_deviation_gains(game, [2, 0, 2, 0])
    assert np.allclose(gains, [0, 8, 0, 0, 0, 3, 0, 0])
    gains = regret.pure_strategy_deviation_gains(game, [1, 1, 1, 1])
    assert np.allclose(gains, [0, 9, -9, 0, 0, 4, -4, 0])

Ejemplo n.º 2

0

Mostrar archivo

def gains(game, serial, prof):
    """the gains from deviating from profile"""
    if is_pure_profile(game, prof):
        gains = regret.pure_strategy_deviation_gains(game, prof)
        return serial.to_deviation_payoff_json(prof, gains)
    else:
        gains = regret.mixture_deviation_gains(game, prof)
        return serial.to_prof_json(gains, False)

Ejemplo n.º 3

0

Mostrar archivo

def calc_gains(game, prof):
    """the gains from deviating from profile"""
    if is_pure_profile(game, prof): # pylint: disable=no-else-return
        gains = regret.pure_strategy_deviation_gains(game, prof)
        return game.devpay_to_json(gains)
    else:
        gains = regret.mixture_deviation_gains(game, prof)
        return game.payoff_to_json(gains)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: dominance.py Proyecto: egtaonline/GameAnalysis

def _gains(game):
    """Returns the gains for deviating for every profile in the game

    Also returns the profile supports for indexing when the gains array should
    be zero because it's invalid versus having an actual zero gain."""
    return np.stack([
        regret.pure_strategy_deviation_gains(game, prof)
        for prof in game.profiles()])

Ejemplo n.º 5

0

Mostrar archivo

def _gains(game):
    """Returns the gains for deviating for every profile in the game

    Also returns the profile supports for indexing when the gains array should
    be zero because it's invalid versus having an actual zero gain."""
    return np.stack([
        regret.pure_strategy_deviation_gains(game, prof)
        for prof in game.profiles()
    ])

Ejemplo n.º 6

0

Mostrar archivo

def regret_matching(game, profile, *, slack=0.1):  # pylint: disable=too-many-locals
    """Regret matching

    Run regret matching. This selects new strategies to play proportionally to
    the gain they receive from deviating from the current profile.

    Parameters
    ----------
    game : Game
        The game to run replicator dynamics on. Game must support
        `deviation_payoffs`.
    profile : array_like
        The initial profile to start with.
    slack : float, optional
        The amount to make sure agents will always play their last played
        strategy. (0, 1)
    """
    strat_players = game.num_role_players.repeat(
        game.num_role_strats).astype(float)

    profile = np.asarray(profile, int)
    mean_gains = np.zeros(game.num_devs)
    mean_mix = profile / strat_players
    mus = np.full(game.num_roles, np.finfo(float).tiny)

    for i in itertools.count(1):  # pragma: no branch
        # Regret matching
        gains = regret.pure_strategy_deviation_gains(game, profile)
        gains *= profile.repeat(game.num_strat_devs)
        mean_gains += (gains - mean_gains) / i
        np.maximum(
            np.maximum.reduceat(
                np.add.reduceat(np.maximum(gains, 0), game.dev_strat_starts),
                game.role_starts), mus, mus)

        # For each strategy sample from regret matching distribution
        new_profile = np.zeros(game.num_strats, int)
        for rgains, prof, nprof, norm, strats in zip(
                np.split(np.maximum(mean_gains, 0), game.dev_role_starts[1:]),
                np.split(profile, game.role_starts[1:]),
                np.split(new_profile, game.role_starts[1:]), mus * (1 + slack),
                game.num_role_strats):
            probs = rgains / norm
            probs[np.arange(0, probs.size, strats + 1)] = 1 - np.add.reduceat(
                probs, np.arange(0, probs.size, strats))
            for count, prob in zip(prof, np.split(probs, strats)):
                nprof += np.random.multinomial(count, prob)

        # Test for convergence
        profile = new_profile
        mean_mix += (profile / strat_players - mean_mix) / (i + 1)
        yield mean_mix

Ejemplo n.º 7

0

Mostrar archivo

Archivo: nash.py Proyecto: egtaonline/GameAnalysis

def regret_matching(game, profile, *, slack=0.1): # pylint: disable=too-many-locals
    """Regret matching

    Run regret matching. This selects new strategies to play proportionally to
    the gain they receive from deviating from the current profile.

    Parameters
    ----------
    game : Game
        The game to run replicator dynamics on. Game must support
        `deviation_payoffs`.
    profile : array_like
        The initial profile to start with.
    slack : float, optional
        The amount to make sure agents will always play their last played
        strategy. (0, 1)
    """
    strat_players = game.num_role_players.repeat(
        game.num_role_strats).astype(float)

    profile = np.asarray(profile, int)
    mean_gains = np.zeros(game.num_devs)
    mean_mix = profile / strat_players
    mus = np.full(game.num_roles, np.finfo(float).tiny)

    for i in itertools.count(1): # pragma: no branch
        # Regret matching
        gains = regret.pure_strategy_deviation_gains(game, profile)
        gains *= profile.repeat(game.num_strat_devs)
        mean_gains += (gains - mean_gains) / i
        np.maximum(np.maximum.reduceat(
            np.add.reduceat(np.maximum(gains, 0), game.dev_strat_starts),
            game.role_starts), mus, mus)

        # For each strategy sample from regret matching distribution
        new_profile = np.zeros(game.num_strats, int)
        for rgains, prof, nprof, norm, strats in zip(
                np.split(np.maximum(mean_gains, 0), game.dev_role_starts[1:]),
                np.split(profile, game.role_starts[1:]),
                np.split(new_profile, game.role_starts[1:]),
                mus * (1 + slack),
                game.num_role_strats):
            probs = rgains / norm
            probs[np.arange(0, probs.size, strats + 1)] = 1 - np.add.reduceat(
                probs, np.arange(0, probs.size, strats))
            for count, prob in zip(prof, np.split(probs, strats)):
                nprof += np.random.multinomial(count, prob)

        # Test for convergence
        profile = new_profile
        mean_mix += (profile / strat_players - mean_mix) / (i + 1)
        yield mean_mix

Ejemplo n.º 8

0

Mostrar archivo

def _gains(game):
    """Returns the gains for deviating for every profile in the game

    Also returns the profile supports for indexing when the gains array should
    be zero because it's invalid versus having an actual zero gain."""
    sizes = np.repeat(game.num_strategies - 1, game.num_strategies)
    offsets = np.insert(sizes.cumsum(), 0, 0)
    size = offsets[-1]
    offsets = offsets[:-1]
    gains = np.zeros((game.num_profiles, size))
    supports = game.profiles > 0

    for i, (prof, support) in enumerate(zip(game.profiles, supports)):
        regs = regret.pure_strategy_deviation_gains(game, prof)
        reps = game.num_strategies[game.role_index[support]] - 1
        reg_offsets = np.insert(reps[:-1].cumsum(), 0, 0)
        inds = (np.repeat(offsets[support] - reg_offsets, reps) +
                np.arange(regs.size))
        gains[i, inds] = regs

    return gains, supports

Ejemplo n.º 9

0

Mostrar archivo

def test_empty_pure_strategy_deviation_gains():
    """Test empty pure strategy deviation gains"""
    game = rsgame.empty(2, [2, 2])
    gains = regret.pure_strategy_deviation_gains(game, [2, 0, 2, 0])
    expected = [np.nan, np.nan, 0, 0, np.nan, np.nan, 0, 0]
    assert np.allclose(gains, expected, equal_nan=True)