def test_random_trace_equilibria(base): """Test random equilibrium trace""" game0 = gamegen.poly_aggfn(base.num_role_players, base.num_role_strats, 6) game1 = gamegen.poly_aggfn(base.num_role_players, base.num_role_strats, 6) eqa = game0.trim_mixture_support(nash.mixed_nash( game0, regret_thresh=1e-4)) for eqm in eqa: if regret.mixture_regret(game0, eqm) > 1e-3: # trimmed equilibrium had too high of regret... continue # pragma: no cover probs, mixes = trace.trace_equilibrium(game0, game1, 0, eqm, 1) for prob, mix in zip(probs, mixes): reg = regret.mixture_regret(rsgame.mix(game0, game1, prob), mix) assert reg <= 1.1e-3 eqa = game1.trim_mixture_support(nash.mixed_nash( game1, regret_thresh=1e-4)) for eqm in eqa: if regret.mixture_regret(game1, eqm) > 1e-3: # trimmed equilibrium had too high of regret... continue # pragma: no cover probs, mixes = trace.trace_equilibrium(game0, game1, 1, eqm, 0) for prob, mix in zip(probs, mixes): reg = regret.mixture_regret(rsgame.mix(game0, game1, prob), mix) assert reg <= 1.1e-3
def test_minreg_grid_roshambo(): game = gamegen.rock_paper_scissors() eqm = nash.min_regret_grid_mixture(game, 3) # Not enough for eq assert np.isclose(regret.mixture_regret(game, eqm), .5), \ "min regret grid didn't find [.5, .5, 0] profile with regret .5" eqm = nash.min_regret_grid_mixture(game, 4) # hit eqa perfectly assert np.isclose(regret.mixture_regret(game, eqm), 0), \ "min regret grid didn't find equilibrium"
def trace_interpolate(game0, game1, peqs, eqa, targets, **kwargs): # pylint: disable=too-many-locals """Get an equilibrium at a specific time Parameters ---------- game0 : RsGame The game to get data from when the mixture probability is 0. game1 : RsGame The game to get data from when the mixture probability is 1. peqs : [float] A parallel list of probabilities for each equilibria in a continuous trace. eqa : [eqm] A parallel list of equilibria for each probability representing continuous equilibria for prob mixture games. targets : [float] The probabilities to compute an equilibria at. kwargs : options The same options as `trace_equilibrium`. """ peqs = np.asarray(peqs, float) eqa = np.asarray(eqa, float) targets = np.asarray(targets, float) # Make everything sorted if np.all(np.diff(peqs) <= 0): peqs = peqs[::-1] eqa = eqa[::-1] order = np.argsort(targets) targets = targets[order] utils.check(np.all(np.diff(peqs) >= 0), 'trace probabilities must be sorted') utils.check(peqs[0] <= targets[0] and targets[-1] <= peqs[-1], 'targets must be internal to trace') result = np.empty((targets.size, game0.num_strats)) scan = zip(utils.subsequences(peqs), utils.subsequences(eqa)) (pi1, pi2), (eqm1, eqm2) = next(scan) for target, i in zip(targets, order): while target > pi2: (pi1, pi2), (eqm1, eqm2) = next(scan) (*_, pt1), (*_, eqt1) = trace_equilibrium( # pylint: disable=too-many-star-expressions game0, game1, pi1, eqm1, target, **kwargs) (*_, pt2), (*_, eqt2) = trace_equilibrium( # pylint: disable=too-many-star-expressions game0, game1, pi2, eqm2, target, **kwargs) if np.isclose(pt1, target) and np.isclose(pt2, target): mixgame = rsgame.mix(game0, game1, target) _, _, result[i] = min( (regret.mixture_regret(mixgame, eqt1), 0, eqt1), (regret.mixture_regret(mixgame, eqt2), 1, eqt2)) elif np.isclose(pt1, target): result[i] = eqt1 elif np.isclose(pt2, target): result[i] = eqt2 else: # pragma: no cover raise ValueError('ode solving failed to reach prob') return result
def scarfs_algorithm(game, mix, *, regret_thresh=1e-2, disc=8): """Uses fixed point method to find nash eqm This is guaranteed to find an equilibrium with regret below regret_thresh if given enough time. However, it's guaranteed convergence is assured by potentially exponential running time, and therefore is not recommended unless you're willing to wait. The underlying algorithm is solving for an approximate Nash fixed point with greater and great approximation until its regret is below the threshold. Arguments --------- game : Game The game to run replicator dynamics on. Game must support `deviation_payoffs`. mix : mixture The mixture to initialize replicator dynamics with. regret_thresh : float, optional The maximum regret of the returned mixture. disc : int, optional The initial discretization of the mixture. A lower initial discretization means fewer possible starting points for search in the mixture space, but is likely to converge faster as the search at higher discretization will be seeded with an approximate equilibrium from a lower discretization. For example, with `disc=2` there are only `game.num_strats - game.num_roles + 1` possible starting points. """ def eqa_func(mixture): """Equilibrium fixed point function""" mixture = game.mixture_from_simplex(mixture) gains = np.maximum(regret.mixture_deviation_gains(game, mixture), 0) result = (mixture + gains) / (1 + np.add.reduceat( gains, game.role_starts).repeat(game.num_role_strats)) return game.mixture_to_simplex(result) disc = min(disc, 8) reg = regret.mixture_regret(game, mix) while reg > regret_thresh: mix = game.mixture_from_simplex( fixedpoint.fixed_point(eqa_func, game.mixture_to_simplex(mix), disc=disc)) reg = regret.mixture_regret(game, mix) disc *= 2 yield mix # Two yields in a row means convergence yield mix
def test_regret_minimize_failure(): """Test that regret minimize fails""" table = [[-.7, -.9, -1.3, -2], [-.7, -.9, -1.1, -1.6], [-.2, -.4, -.8, -1.5]] game = aggfn.aggfn(3, 3, np.eye(3), np.eye(3, dtype=bool), table) eqm = nash.regret_minimize(game, [0.9, 0.05, 0.05]) assert regret.mixture_regret(game, eqm) > 0.1
def test_mixed_prisoners_dilemma(_): """Test mixed prisoners dilemma""" game = gamegen.sym_2p2s_game(2, 0, 3, 1) # prisoners dilemma eqm = [0, 1] assert regret.mixture_regret(game, eqm) == 0, \ 'Known symmetric mixed was not zero regret'
def _smooth_trace(game0, game1, probs, eqa, trace_args): """Smooth the equilibria in a trace in place Smoothing attempts to trace out from one time to an adjacent time. If the new point has lower regret, it's taken instead. This onle goes one direction, so it should be repeated for reversed views. """ for (pfrom, pto), (eqmfrom, eqmto) in zip(utils.subsequences(probs), utils.subsequences(eqa)): (*_, pres), (*_, eqmres) = trace.trace_equilibrium( # pylint: disable=too-many-star-expressions game0, game1, pfrom, eqmfrom, pto, **trace_args) if np.isclose(pres, pto): mixgame = rsgame.mix(game0, game1, pto) regto = regret.mixture_regret(mixgame, eqmto) regres = regret.mixture_regret(mixgame, eqmres) if regres < regto: np.copyto(eqmto, eqmres)
def scarfs_algorithm(game, mix, *, regret_thresh=1e-2, disc=8): """Uses fixed point method to find nash eqm This is guaranteed to find an equilibrium with regret below regret_thresh if given enough time. However, it's guaranteed convergence is assured by potentially exponential running time, and therefore is not recommended unless you're willing to wait. The underlying algorithm is solving for an approximate Nash fixed point with greater and great approximation until its regret is below the threshold. Arguments --------- game : Game The game to run replicator dynamics on. Game must support `deviation_payoffs`. mix : mixture The mixture to initialize replicator dynamics with. regret_thresh : float, optional The maximum regret of the returned mixture. disc : int, optional The initial discretization of the mixture. A lower initial discretization means fewer possible starting points for search in the mixture space, but is likely to converge faster as the search at higher discretization will be seeded with an approximate equilibrium from a lower discretization. For example, with `disc=2` there are only `game.num_strats - game.num_roles + 1` possible starting points. """ def eqa_func(mixture): """Equilibrium fixed point function""" mixture = game.mixture_from_simplex(mixture) gains = np.maximum(regret.mixture_deviation_gains(game, mixture), 0) result = (mixture + gains) / (1 + np.add.reduceat( gains, game.role_starts).repeat(game.num_role_strats)) return game.mixture_to_simplex(result) disc = min(disc, 8) reg = regret.mixture_regret(game, mix) while reg > regret_thresh: mix = game.mixture_from_simplex(fixedpoint.fixed_point( eqa_func, game.mixture_to_simplex(mix), disc=disc)) reg = regret.mixture_regret(game, mix) disc *= 2 yield mix # Two yields in a row means convergence yield mix
def process(eqm): reg = regret.mixture_regret(game, eqm) if (reg <= regret_thresh and all(linalg.norm(e - eqm) >= dist_thresh for e in equilibria)): equilibria.append(eqm[None]) if reg < best[0]: best[0] = reg best[1] = eqm[None]
def test_sym_2p2s_known_eq(eq_prob): game = gamegen.sym_2p2s_known_eq(eq_prob) assert game.is_complete(), "didn't generate a full game" assert game.is_symmetric(), \ "didn't generate a symmetric game" assert np.all(2 == game.num_players), \ "didn't generate correct number of strategies" assert np.all(2 == game.num_strategies), \ "didn't generate correct number of strategies" eqm = np.array([eq_prob, 1 - eq_prob]) reg = regret.mixture_regret(game, eqm) assert np.isclose(reg, 0), \ "expected equilibrium wasn't an equilibrium, reg: {}".format(reg) for non_eqm in game.pure_mixtures(): reg = regret.mixture_regret(game, non_eqm) # If eq_prob is 0 or 1, then pure is the desired mixture assert non_eqm[0] == eq_prob or not np.isclose(reg, 0), \ "pure mixtures was equilibrium, {} {}".format(non_eqm, reg)
def test_old_nash_min_reg(): """Test old nash functions appropriately""" prob = 1 / np.sqrt(2) game = gamegen.sym_2p2s_known_eq(prob) eqa = nash.mixed_nash(game, replicator=dict(max_iters=0), min_reg=True) assert eqa.shape == (1, 2) eqm, = eqa reg = regret.mixture_regret(game, eqm) assert reg > 1e-3
def main(args): """Entry point for learning script""" with warnings.catch_warnings(record=True) as warns: game = learning.rbfgame_train(gamereader.load(args.input)) methods = {'replicator': {'max_iters': args.max_iters, 'converge_thresh': args.converge_thresh}, 'optimize': {}} mixed_equilibria = game.trim_mixture_support( nash.mixed_nash(game, regret_thresh=args.regret_thresh, dist_thresh=args.dist_thresh, processes=args.processes, at_least_one=args.one, **methods), thresh=args.supp_thresh) equilibria = [(eqm, regret.mixture_regret(game, eqm)) for eqm in mixed_equilibria] # Output game args.output.write('Game Learning\n') args.output.write('=============\n') args.output.write(str(game)) args.output.write('\n\n') if any(w.category == UserWarning and w.message.args[0] == ( 'some lengths were at their bounds, this may indicate a poor ' 'fit') for w in warns): args.output.write('Warning\n') args.output.write('=======\n') args.output.write( 'Some length scales were at their limit. This is a strong\n' 'indication that a good representation was not found.\n') args.output.write('\n\n') # Output Equilibria args.output.write('Equilibria\n') args.output.write('----------\n') if equilibria: args.output.write('Found {:d} equilibri{}\n\n'.format( len(equilibria), 'um' if len(equilibria) == 1 else 'a')) for i, (eqm, reg) in enumerate(equilibria, 1): args.output.write('Equilibrium {:d}:\n'.format(i)) args.output.write(game.mixture_to_str(eqm)) args.output.write('\nRegret: {:.4f}\n\n'.format(reg)) else: args.output.write('Found no equilibria\n\n') args.output.write('\n') # Output json data args.output.write('Json Data\n') args.output.write('=========\n') json_data = { 'equilibria': [game.mixture_to_json(eqm) for eqm, _ in equilibria]} json.dump(json_data, args.output) args.output.write('\n')
def test_sym_2p2s_known_eq(eq_prob): """Test known equilibrium game""" game = gamegen.sym_2p2s_known_eq(eq_prob) assert game.is_complete(), "didn't generate a full game" assert game.is_symmetric(), \ "didn't generate a symmetric game" assert np.all(game.num_role_players == 2), \ "didn't generate correct number of strategies" assert np.all(game.num_role_strats == 2), \ "didn't generate correct number of strategies" eqm = np.array([eq_prob, 1 - eq_prob]) reg = regret.mixture_regret(game, eqm) assert np.isclose(reg, 0), \ "expected equilibrium wasn't an equilibrium, reg: {}".format(reg) for non_eqm in game.pure_mixtures(): reg = regret.mixture_regret(game, non_eqm) # If eq_prob is 0 or 1, then pure is the desired mixture assert non_eqm[0] == eq_prob or not np.isclose(reg, 0), \ 'pure mixtures was equilibrium, {} {}'.format(non_eqm, reg)
def test_random_trace_interpolate(game0, game1): # pylint: disable=too-many-locals """Test random trace interpolation""" prob = np.random.random() eqa = game0.trim_mixture_support(nash.mixed_nash( rsgame.mix(game0, game1, prob), regret_thresh=1e-4)) for eqm in eqa: if regret.mixture_regret(rsgame.mix(game0, game1, prob), eqm) > 1e-3: # trimmed equilibrium had too high of regret... continue # pragma: no cover for target in [0, 1]: # Test that interpolate recovers missing equilibria probs, mixes = trace.trace_equilibrium( game0, game1, prob, eqm, target) if probs.size < 3: # not enough to test leave one out continue # pragma: no cover start, interp, end = np.sort(np.random.choice( probs.size, 3, replace=False)) interp_mix, = trace.trace_interpolate( game0, game1, [probs[start], probs[end]], [mixes[start], mixes[end]], [probs[interp]]) assert np.allclose(interp_mix, mixes[interp], rtol=1e-2, atol=2e-2) # Test interp at first mix, = trace.trace_interpolate( game0, game1, probs, mixes, [probs[0]]) assert np.allclose(mix, mixes[0], rtol=1e-2, atol=2e-2) # Test interp at last mix, = trace.trace_interpolate( game0, game1, probs, mixes, [probs[-1]]) assert np.allclose(mix, mixes[-1], rtol=1e-2, atol=2e-2) # Test random t p_interp = np.random.uniform(probs[0], probs[-1]) mix, = trace.trace_interpolate( game0, game1, probs, mixes, [p_interp]) assert regret.mixture_regret(rsgame.mix( game0, game1, p_interp), mix) <= 1.1e-3
def test_mixed_prisoners_dilemma(methods): game = gamegen.prisoners_dilemma() eqa = nash.mixed_nash(game, dist_thresh=1e-3, processes=1, **methods) assert eqa.shape[0] >= 1, \ "didn't find at least one equilibria in pd {}".format(eqa) assert all(regret.mixture_regret(game, eqm) < 1e-3 for eqm in eqa), \ "returned equilibria with high regret" expected = [0, 1] assert np.isclose(eqa, expected, atol=1e-3, rtol=1e-3).all(1).any(), \ "didn't find pd equilibrium {}".format(eqa)
async def get_point(prob, eqm): """Get the point in a trace for an equilibrium""" supp = eqm > 0 game0 = await agame0.get_deviation_game(supp) game1 = await agame1.get_deviation_game(supp) reg = regret.mixture_regret(rsgame.mix(game0, game1, prob), eqm) return { "t": float(prob), "equilibrium": sched0.mixture_to_json(eqm), "regret": float(reg), }
def min_regret_rand_mixture(game, mixtures): """Finds the mixed profile with the confirmed lowest regret The search is done over a random sampling of `mixtures` mixed profiles. Arguments --------- mixtures : int > 0 Number of mixtures to evaluate the regret of. """ mixes = game.random_mixtures(mixtures) regs = np.fromiter((regret.mixture_regret(game, mix) for mix in mixes), float, mixtures) return mixes[np.nanargmin(regs)]
def min_regret_grid_mixture(game, points): """Finds the mixed profile with the confirmed lowest regret The search is done over a grid with `points` per dimensions. Arguments --------- points : int > 1 Number of points per dimension to search. """ mixes = game.grid_mixtures(points) regs = np.fromiter((regret.mixture_regret(game, mix) for mix in mixes), float, mixes.shape[0]) return mixes[np.nanargmin(regs)]
async def add_deviations(rest, mix, role_index): """Add deviations to be evaluated""" # We need the restriction here, since trimming support may increase # regret of strategies in the initial restriction data = await agame.get_deviation_game(mix > 0, role_index) devs = data.deviation_payoffs(mix) exp = np.add.reduceat(devs * mix, agame.role_starts) gains = devs - exp.repeat(agame.num_role_strats) if role_index is None: if np.all((gains <= regret_thresh) | rest): # Found equilibrium reg = gains.max() if equilibria.add(mix, reg): logging.warning( "found equilibrium %s in game %s with regret %f", agame.mixture_to_repr(mix), agame, reg, ) else: await asyncio.gather(*[ queue_restrictions(rgains, ri, rest) for ri, rgains in enumerate( np.split(gains, agame.role_starts[1:])) ]) else: # Set role index rgains = np.split(gains, agame.role_starts[1:])[role_index] rrest = np.split(rest, agame.role_starts[1:])[role_index] if np.all((rgains <= regret_thresh) | rrest): # No deviations role_index += 1 if role_index < agame.num_roles: # Explore next deviation await add_deviations(rest, mix, role_index) else: # found equilibrium # This should not require scheduling as to get here all # deviations have to be scheduled data = await agame.get_deviation_game(mix > 0) reg = regret.mixture_regret(data, mix) if equilibria.add(mix, reg): logging.warning( "found equilibrium %s in game %s with regret %f", agame.mixture_to_repr(mix), agame, reg, ) else: await queue_restrictions(rgains, role_index, rest)
def process_game(args): # pylint: disable=too-many-locals """Compute information about a game""" i, (name, game) = args np.random.seed(i) # Reproducible randomness profiles = gen_profiles(game) reg_thresh = 1e-2 # FIXME conv_thresh = 1e-2 * np.sqrt(2 * game.num_roles) # FIXME all_eqa = collect.mcces(conv_thresh) meth_times = {} meth_eqa = {} for method, single, func in gen_methods(): logging.warning('Starting {} for {} {:d}'.format(method, name, i)) prof_times = {} prof_eqa = {} for prof, mix_gen in profiles.items(): times = [] eqa = [] if prof != 'uniform' and single: continue for mix in mix_gen(): start = time.time() eqm = func(game, mix) speed = time.time() - start reg = regret.mixture_regret(game, eqm) times.append(speed) if reg < reg_thresh: all_eqa.add(eqm, reg) eqa.append(eqm) prof_times[prof] = times prof_eqa[prof] = eqa meth_times[method] = prof_times meth_eqa[method] = prof_eqa logging.warning('Finished {} for {} {:d} - took {:f} seconds'.format( method, name, i, speed)) inds = {} for norm, _ in all_eqa: inds[norm] = len(inds) for prof_eqa in meth_eqa.values(): for prof, eqa in prof_eqa.items(): prof_eqa[prof] = list({inds[all_eqa.get(e)] for e in eqa}) return name, meth_times, meth_eqa
def process(i, eqm): """Process an equilibrium""" reg = regret.mixture_regret(game, eqm) if reg < regret_thresh: equilibria.add(eqm, reg) best[:] = min(best, [reg, i, eqm])
def test_multiplicative_weights_failure(): """Test that multiplicative weights fails""" game = gamegen.rock_paper_scissors(win=0.5) eqm = nash.multiplicative_weights_dist(game, [0.6, 0.3, 0.1]) assert regret.mixture_regret(game, eqm) > 0.1
def mixed_equilibria( # pylint: disable=too-many-locals game, style='best', *, regret_thresh=1e-2, dist_thresh=0.1, processes=None): """Compute mixed equilibria Parameters ---------- game : RsGame Game to compute equilibria of. style : str, optional The style of equilibria funding to run. Available styles are: fast - run minimal algorithms and return nothing on failure more - run minimal and if nothing run other reasonable algorithms best - run extra and if nothing run exponential with timeout one - run extra and if nothing run exponential <any>* - if nothing found, return minimum regret regret_thresh : float, optional Minimum regret for a mixture to count as an equilibrium. dist_thresh : float, optional Minimum role norm for equilibria to be considered distinct. [0, 1] processes : int, optional Number of processes to compute equilibria with. If None, all available processes will be used. """ utils.check(style in _STYLES, 'style {} not one of {}', style, _STYLES_STR) utils.check(processes is None or processes > 0, 'processes must be positive or None') # TODO Is there a better interface for checking dev payoffs utils.check( not np.isnan(game.deviation_payoffs(game.uniform_mixture())).any(), 'Nash finding only works on game with full deviation data') seq = 0 req = 0 best = [np.inf, 0, None] equilibria = collect.mcces(dist_thresh * np.sqrt(2 * game.num_roles)) func = functools.partial(_serial_nash_func, game) extra = { 'fast': lambda _, __: (), 'more': _more, 'best': _best, 'one': _one, }[style.rstrip('*')](game, regret_thresh) def process_req(tup): """Count required methods""" nonlocal req req += 1 return tup + (True, ) with multiprocessing.Pool(processes) as pool: for preq, eqm in pool.imap_unordered( func, itertools.chain(map(process_req, _required(game)), (tup + (False, ) for tup in extra))): seq += 1 req -= preq reg = regret.mixture_regret(game, eqm) best[:] = min(best, [reg, seq, eqm[None]]) if reg < regret_thresh: equilibria.add(eqm, reg) if not req and equilibria: return np.stack([e for e, _ in equilibria]) assert not req return best[-1] if style.endswith('*') else np.empty((0, game.num_strats))
def test_minreg_rand_roshambo(): game = gamegen.rock_paper_scissors() eqm = nash.min_regret_rand_mixture(game, 20) assert regret.mixture_regret(game, eqm) < 2 + 1e-7, \ "Found a mixture with greater than maximum regret"
def quiesce(sim, game, serial, base_name, configuration={}, dpr=None, log=logging, profiles=(), all_devs=True, max_profiles=500, max_subgame_size=3, sleep_time=300, required_equilibria=1, regret_thresh=1e-3, reschedule_limit=10, process_memory=4096, observation_time=600, observation_increment=1, nodes=1): """Quiesce a game""" # Create scheduler sched = sim.create_generic_scheduler( name='{base}_generic_quiesce_{random}'.format( base=base_name, random=utils.random_string(6)), active=1, process_memory=process_memory, size=game.num_players.sum(), time_per_observation=observation_time, observations_per_simulation=observation_increment, nodes=nodes, default_observation_requirement=observation_increment, configuration=configuration) # Add roles and counts to scheduler for role, count in zip(serial.role_names, game.num_players): sched.add_role(role, count) log.info('Created scheduler %d ' '(http://egtaonline.eecs.umich.edu/generic_schedulers/%d)', sched.id, sched.id) # Data lookup psched = profsched.ProfileScheduler( game, serial, sched, max_profiles, log, profiles) # Set up reduction if dpr is None: red = reduction.Identity(game.num_strategies, game.num_players) else: red = reduction.DeviationPreserving(game.num_strategies, game.num_players, dpr) # Set up main scheduler abstraction qsched = profsched.QuiesceScheduler(game, red, psched) confirmed_equilibria = [] # Confirmed equilibra explored_subgames = [] # Already explored subgames explored_mixtures = [] # Already explored mixtures backup = [] # Extra subgames to explore subgames = [] # Subgames that are scheduling deviations = [] # Deviations that are scheduling # Define useful functions def add_subgame(subm): """Adds a subgame to the scheduler""" if not any(np.all(subm <= s) for s in explored_subgames): # Unexplored explored_subgames[:] = [s for s in explored_subgames if np.any(s > subm)] explored_subgames.append(subm) log.debug('Exploring subgame:\n%s\n', json.dumps( {r: list(s) for r, s in serial.to_prof_json(subm).items()}, indent=2)) subgames.append( qsched.schedule_subgame(subm, observation_increment)) else: # Subgame already explored log.debug('Subgame already explored:\n%s\n', json.dumps( {r: list(s) for r, s in serial.to_prof_json(subm).items()}, indent=2)) def add_mixture(mixture, role_index=None): """Adds the given mixture to the scheduler""" if any(linalg.norm(mix - mixture) < 1e-3 and ( role_index is None or role_index <= ri) for mix, ri in explored_mixtures): if role_index is None: log.debug('Mixture already explored:\n%s\n', json.dumps( serial.to_prof_json(mixture), indent=2)) else: log.debug('Mixture already explored for role "%s":\n%s\n', serial.role_names[role_index], json.dumps(serial.to_prof_json(mixture), indent=2)) else: explored_mixtures.append((mixture, role_index)) if role_index is None: log.debug('Exploring equilibrium deviations:\n%s\n', json.dumps(serial.to_prof_json(mixture), indent=2)) else: log.debug( 'Exploring equilibrium deviations for role "%s":\n%s\n', serial.role_names[role_index], json.dumps(serial.to_prof_json(mixture), indent=2)) dev = qsched.schedule_deviations( mixture > 0, observation_increment, role_index) deviations.append((mixture, dev)) def analyze_subgame(unsched_subgames, sub): """Process a subgame""" if sub.is_complete(): subg = sub.get_subgame() sub_eqa = nash.mixed_nash(subg, regret_thresh=regret_thresh) eqa = subgame.translate(subg.trim_mixture_support(sub_eqa), sub.subgame_mask) if eqa.size == 0: # No equilibria if sub.counts < reschedule_limit * observation_increment: log.info( 'Found no equilibria in subgame:\n%s\n', json.dumps( {r: list(s) for r, s in serial.to_prof_json(sub.subgame_mask).items()}, indent=2)) sub.update_counts(sub.counts + observation_increment) unsched_subgames.append(sub) else: log.error( 'Failed to find equilibria in subgame:\n%s\n', json.dumps( {r: list(s) for r, s in serial.to_prof_json(subm).items()}, indent=2)) else: log.debug( 'Found candidate equilibria:\n%s\nin subgame:\n%s\n', json.dumps(list(map(serial.to_prof_json, eqa)), indent=2), json.dumps( {r: list(s) for r, s in serial.to_prof_json(sub.subgame_mask).items()}, indent=2)) if all_devs: for eqm in eqa: add_mixture(eqm) else: for eqm in eqa: add_mixture(eqm, 0) else: unsched_subgames.append(sub) if all_devs: def analyze_deviations(unsched_deviations, mix, dev): """Analyzes responses to an equilibrium and book keeps""" if dev.is_complete(): dev_game = dev.get_game() responses = regret.mixture_deviation_gains( dev_game, mix, assume_complete=True) log.debug('Responses:\n%s\nto candidate equilibrium:\n%s\n', json.dumps(serial.to_prof_json( responses, filter_zeros=False), indent=2), json.dumps(serial.to_prof_json(mix), indent=2)) if np.all(responses < regret_thresh): # found equilibria if not any(linalg.norm(m - mix) < 1e-3 for m in confirmed_equilibria): confirmed_equilibria.append(mix) log.info('Confirmed equilibrium:\n%s\n', json.dumps( serial.to_prof_json(mix), indent=2)) else: # Queue up next subgames subsize = dev.subgame_mask.sum() # TODO Normalize role deviations for rstart, role_resps in zip(game.role_starts, game.role_split(responses)): order = np.argpartition( role_resps, role_resps.size - 1) gain = role_resps[order[-1]] if gain > 0: # Positive best response exists for role subm = dev.subgame_mask.copy() subm[order[-1] + rstart] = True if subsize < max_subgame_size: add_subgame(subm) else: heapq.heappush(backup, ( (False, False, subsize, -gain), subm)) order = order[:-1] # Priority for backup is (not best response, not # beneficial response, subgame size, deviation loss). # Thus, best responses are first, then positive # responses, then small subgames, then highest gain. # Add the rest to the backup for ind in order: subm = dev.subgame_mask.copy() subm[ind + rstart] = True gain = role_resps[ind] heapq.heappush(backup, ( (True, gain < 0, subsize, -gain, id(subm)), subm)) else: unsched_deviations.append((mix, dev)) else: def analyze_deviations(unsched_deviations, mix, dev): """Analyzes responses to an equilibrium and book keeps""" if dev.is_complete(): dev_game = dev.get_game() role_resps = game.role_split(regret.mixture_deviation_gains( dev_game, mix, assume_complete=True))[dev.role_index] log.debug( '"%s" Responses:\n%s\nto candidate equilibrium:\n%s\n', serial.role_names[dev.role_index], json.dumps(dict(zip(serial.strat_names[dev.role_index], role_resps)), indent=2), json.dumps(serial.to_prof_json(mix), indent=2)) if np.all(role_resps < regret_thresh): # role has no deviations if dev.role_index == game.num_roles - 1: if not any(linalg.norm(m - mix) < 1e-3 for m in confirmed_equilibria): confirmed_equilibria.append(mix) log.info('Confirmed equilibrium:\n%s\n', json.dumps(serial.to_prof_json(mix), indent=2)) else: add_mixture(mix, dev.role_index + 1) else: # Queue up next subgames subsize = dev.subgame_mask.sum() # TODO Normalize role deviations rstart = game.role_starts[dev.role_index] order = np.argpartition(role_resps, role_resps.size - 1) gain = role_resps[order[-1]] # Positive best response exists for role subm = dev.subgame_mask.copy() subm[order[-1] + rstart] = True if subsize < max_subgame_size: add_subgame(subm) else: heapq.heappush(backup, ( (False, False, subsize, -gain), subm)) # Priority for backup is (not best response, not beneficial # response, subgame size, deviation loss). Thus, best # responses are first, then positive responses, then small # subgames, then highest gain. # Add the rest to the backup for ind in order[:-1]: subm = dev.subgame_mask.copy() subm[ind + rstart] = True gain = role_resps[ind] heapq.heappush(backup, ( (True, gain < 0, subsize, -gain, id(subm)), subm)) else: unsched_deviations.append((mix, dev)) try: # Initialize with pure subgames for subm in subgame.pure_subgames(game): add_subgame(subm) # While still scheduling left to do while subgames or deviations: if (qsched.update() or any(s.is_complete() for s in subgames) or any(d.is_complete() for _, d in deviations)): # Something finished scheduling unsched_subgames = [] for sub in subgames: analyze_subgame(unsched_subgames, sub) subgames = unsched_subgames unsched_deviations = [] for mix, dev in deviations: analyze_deviations(unsched_deviations, mix, dev) deviations = unsched_deviations if (not subgames and not deviations and len(confirmed_equilibria) < required_equilibria): # We've finished all the required stuff, but still haven't # found an equilibrium, so pop a backup off log.debug('Extracting backup game\n') while backup and not subgames: add_subgame(heapq.heappop(backup)[1]) else: # We're still waiting for jobs to complete, so take a break log.debug('Waiting %d seconds for simulations to finish...\n', sleep_time) time.sleep(sleep_time) except KeyboardInterrupt: # Manually killed, so just deactivate log.error('Manually killed quiesce script. Deactivating scheduler\n') sched.deactivate() sys.exit(1) log.info('Deactivating scheduler %d\n', sched.id) sched.deactivate() final_game = psched.get_game() red_game = red.reduce_game(final_game, True) equilibria = (np.array(confirmed_equilibria) if confirmed_equilibria else np.empty((0, game.num_role_strats))) complete_subgames = np.array(explored_subgames) regrets = np.fromiter((regret.mixture_regret(red_game, eqm) for eqm in confirmed_equilibria), float, len(confirmed_equilibria)) final_log = [dict(regret=float(r), equilibrium=serial.to_prof_json(eqm)) for eqm, r in zip(equilibria, regrets)] log.error('Finished quiescing\nConfirmed equilibria:\n%s\n' 'Explored %d subgames, sampled %d profiles with %d distinct\n', json.dumps(final_log, indent=2), complete_subgames.shape[0], psched.num_profiles, psched.num_unique_profiles) # TODO return failed subgames return equilibria, complete_subgames, final_game
def main(args): game, serial = gameio.read_game(json.load(args.input)) # create gpgame lgame = gpgame.PointGPGame(game) # mixed strategy nash equilibria search methods = { 'replicator': { 'max_iters': args.max_iters, 'converge_thresh': args.converge_thresh}} mixed_equilibria = game.trim_mixture_support( nash.mixed_nash(lgame, regret_thresh=args.regret_thresh, dist_thresh=args.dist_thresh, processes=args.processes, at_least_one=True, **methods), args.supp_thresh) equilibria = [(eqm, regret.mixture_regret(lgame, eqm)) for eqm in mixed_equilibria] # Output game args.output.write('Game Learning\n') args.output.write('=============\n') args.output.write(game.to_str(serial)) args.output.write('\n\n') # Output social welfare args.output.write('Social Welfare\n') args.output.write('--------------\n') welfare, profile = game.get_max_social_welfare() if profile is None: args.output.write('There was no profile with complete payoff data\n\n') else: args.output.write('\nMaximum social welfare profile:\n') args.output.write(serial.to_prof_printstring(profile)) args.output.write('Welfare: {:.4f}\n\n'.format(welfare)) if game.num_roles > 1: for role, welfare, profile in zip( serial.role_names, *game.get_max_social_welfare(True)): args.output.write('Maximum "{}" welfare profile:\n'.format( role)) args.output.write(serial.to_prof_printstring(profile)) args.output.write('Welfare: {:.4f}\n\n'.format(welfare)) args.output.write('\n') # Output Equilibria args.output.write('Equilibria\n') args.output.write('----------\n') if equilibria: args.output.write('Found {:d} equilibri{}\n\n'.format( len(equilibria), 'um' if len(equilibria) == 1 else 'a')) for i, (eqm, reg) in enumerate(equilibria, 1): args.output.write('Equilibrium {:d}:\n'.format(i)) args.output.write(serial.to_prof_printstring(eqm)) args.output.write('Regret: {:.4f}\n\n'.format(reg)) else: args.output.write('Found no equilibria\n\n') args.output.write('\n') # Output json data args.output.write('Json Data\n') args.output.write('=========\n') json_data = { 'equilibria': [serial.to_prof_json(eqm) for eqm, _ in equilibria]} json.dump(json_data, args.output) args.output.write('\n')
def test_faststar_failure(hardgame): """Test that fast fails to find an equilibrium""" eqa = nash.mixed_equilibria(hardgame, 'fast*', processes=1) assert eqa.shape == (1, 9) reg = regret.mixture_regret(hardgame, eqa[0]) assert reg > 1e-2
async def get_regret(eqm): """Gets the regret of an equilibrium""" game = await agame.get_deviation_game(eqm > 0) return float(regret.mixture_regret(game, eqm))
def test_fictitious_play_failure(): """Test that fictitious play fails""" game = gamegen.rock_paper_scissors(win=0.5) eqm = nash.fictitious_play(game, [0.6, 0.3, 0.1], max_iters=10000) # pylint: disable=unexpected-keyword-arg assert regret.mixture_regret(game, eqm) > 0.1
def below_regret_thresh(prob, mix_neg): """Event for regret going above threshold""" mix = egame.trim_mixture_support(mix_neg, thresh=0) reg = regret.mixture_regret(rsgame.mix(game0, game1, prob), mix) return reg - regret_thresh
def calc_reg(game, prof): """the regret of the profile""" if is_pure_profile(game, prof): # pylint: disable=no-else-return return regret.pure_strategy_regret(game, np.asarray(prof, int)).item() else: return regret.mixture_regret(game, prof).item()
def mixed_equilibria( # pylint: disable=too-many-locals game, style='best', *, regret_thresh=1e-2, dist_thresh=0.1, processes=None): """Compute mixed equilibria Parameters ---------- game : RsGame Game to compute equilibria of. style : str, optional The style of equilibria funding to run. Available styles are: fast - run minimal algorithms and return nothing on failure more - run minimal and if nothing run other reasonable algorithms best - run extra and if nothing run exponential with timeout one - run extra and if nothing run exponential <any>* - if nothing found, return minimum regret regret_thresh : float, optional Minimum regret for a mixture to count as an equilibrium. dist_thresh : float, optional Minimum role norm for equilibria to be considered distinct. [0, 1] processes : int, optional Number of processes to compute equilibria with. If None, all available processes will be used. """ utils.check(style in _STYLES, 'style {} not one of {}', style, _STYLES_STR) utils.check( processes is None or processes > 0, 'processes must be positive or None') # TODO Is there a better interface for checking dev payoffs utils.check( not np.isnan(game.deviation_payoffs(game.uniform_mixture())).any(), 'Nash finding only works on game with full deviation data') seq = 0 req = 0 best = [np.inf, 0, None] equilibria = collect.mcces(dist_thresh * np.sqrt(2 * game.num_roles)) func = functools.partial(_serial_nash_func, game) extra = { 'fast': lambda _, __: (), 'more': _more, 'best': _best, 'one': _one, }[style.rstrip('*')](game, regret_thresh) def process_req(tup): """Count required methods""" nonlocal req req += 1 return tup + (True,) with multiprocessing.Pool(processes) as pool: for preq, eqm in pool.imap_unordered(func, itertools.chain( map(process_req, _required(game)), (tup + (False,) for tup in extra))): seq += 1 req -= preq reg = regret.mixture_regret(game, eqm) best[:] = min(best, [reg, seq, eqm[None]]) if reg < regret_thresh: equilibria.add(eqm, reg) if not req and equilibria: return np.stack([e for e, _ in equilibria]) assert not req return best[-1] if style.endswith('*') else np.empty((0, game.num_strats))
def trace_equilibrium( # pylint: disable=too-many-locals game0, game1, peq, eqm, target, *, regret_thresh=1e-3, max_step=0.1, singular=1e-7, **ivp_args): """Try to trace an equilibrium out to target Takes two games, a fraction that they're mixed (`peq`), and an equilibrium of the mixed game (`eqm`). It then attempts to find the equilibrium at the `target` mixture. It may not reach target, but will return as far as it got. The return value is two parallel arrays for the probabilities with known equilibria and the equilibria. Parameters ---------- game0 : RsGame The first game that's merged. Represents the payoffs when `peq` is 0. game1 : RsGame The second game that's merged. Represents the payoffs when `peq` is 1. peq : float The amount that the two games are merged such that `eqm` is an equilibrium. Must be in [0, 1]. eqm : ndarray An equilibrium when `game0` and `game1` are merged a `peq` fraction. target : float The desired mixture probability to have an equilibrium at. regret_thresh : float, optional The amount of gain from deviating to a strategy outside support can have before it's considered a beneficial deviation and the tracing stops. This should be larger than zero as most equilibria are approximate due to floating point precision. max_step : float, optional The maximum step to take in t when evaluating. singular : float, optional An absolute determinant below this value is considered singular. Occasionally the derivative doesn't exist, and this is one way in which that manifests. This values regulate when ODE solving terminates due to a singular matrix. ivp_args Any remaining keyword arguments are passed to the ivp solver. """ egame = rsgame.empty_copy(game0) eqm = np.asarray(eqm, float) utils.check(egame.is_mixture(eqm), "equilibrium wasn't a valid mixture") utils.check( regret.mixture_regret(rsgame.mix(game0, game1, peq), eqm) <= regret_thresh + 1e-7, "equilibrium didn't have regret below threshold") ivp_args.update(max_step=max_step) # It may be handy to have the derivative of this so that the ode solver can # be more efficient, except that computing the derivative w.r.t. t requires # the hessian of the deviation payoffs, which would be complicated and so # far has no use anywhere else. def ode(prob, mix_neg): """ODE function for solve_ivp""" div = np.zeros(egame.num_strats) mix = egame.trim_mixture_support(mix_neg, thresh=0) supp = mix > 0 rgame = egame.restrict(supp) dev1, jac1 = game0.deviation_payoffs(mix, jacobian=True) dev2, jac2 = game1.deviation_payoffs(mix, jacobian=True) gvals = (dev1 - dev2)[supp] fvecs = ((1 - prob) * jac1 + prob * jac2)[supp][:, supp] gvec = np.concatenate([ np.delete(np.diff(gvals), rgame.role_starts[1:] - 1), np.zeros(egame.num_roles) ]) fmat = np.concatenate([ np.delete(np.diff(fvecs, 1, 0), rgame.role_starts[1:] - 1, 0), np.eye(egame.num_roles).repeat(rgame.num_role_strats, 1) ]) if singular < np.abs(np.linalg.det(fmat)): div[supp] = np.linalg.solve(fmat, gvec) return div def below_regret_thresh(prob, mix_neg): """Event for regret going above threshold""" mix = egame.trim_mixture_support(mix_neg, thresh=0) reg = regret.mixture_regret(rsgame.mix(game0, game1, prob), mix) return reg - regret_thresh below_regret_thresh.terminal = True below_regret_thresh.direction = 1 def singular_jacobian(prob, mix_neg): """Event for when jacobian is singular""" mix = egame.trim_mixture_support(mix_neg, thresh=0) supp = mix > 0 rgame = egame.restrict(supp) _, jac1 = game0.deviation_payoffs(mix, jacobian=True) _, jac2 = game1.deviation_payoffs(mix, jacobian=True) fvecs = ((1 - prob) * jac1 + prob * jac2)[supp][:, supp] fmat = np.concatenate([ np.delete(np.diff(fvecs, 1, 0), rgame.role_starts[1:] - 1, 0), np.eye(egame.num_roles).repeat(rgame.num_role_strats, 1) ]) return np.abs(np.linalg.det(fmat)) - singular singular_jacobian.terminal = True singular_jacobian.direction = -1 events = [below_regret_thresh, singular_jacobian] # This is to scope the index def create_support_loss(ind): """Create support loss for every ind""" def support_loss(_, mix): """Support loss event""" return mix[ind] support_loss.direction = -1 return support_loss for strat in range(egame.num_strats): events.append(create_support_loss(strat)) with np.errstate(divide='ignore'): res = integrate.solve_ivp(ode, [peq, target], eqm, events=events, **ivp_args) return res.t, egame.trim_mixture_support(res.y.T, thresh=0)
def reg(game, serial, prof): """the regret of the profile""" if is_pure_profile(game, prof): return regret.pure_strategy_regret(game, prof).item() else: return regret.mixture_regret(game, prof).item()