def test_WSLS(): sigmund = ts.agent.WSLS() sigmund.choice = 0 # Manually setting choice penny = ts.PayoffMatrix(name="penny_competitive") assert 0 == sigmund.compete(op_choice=1, p_matrix=penny, agent=0) sigmund.choice = 1 # Manually setting choice assert 1 == sigmund.compete(op_choice=0, p_matrix=penny, agent=0)
def test_tutorial(): # Get the competitive penny game payoff matrix penny = ts.PayoffMatrix("penny_competitive") tom_1 = ts.TOM(level=1) init_states = tom_1.get_internal_states() init_states["own_states"]["p_k"] = [0.3, 0.7] tom_1.set_internal_states(init_states) # print the changed states tom_1.print_internal()
def test_learning_function(): penny = ts.PayoffMatrix(name="penny_competitive") prev_internal_states = { "opponent_states": {}, "own_states": { "p_op_mean0": 0, "p_op_var0": 0 }, } params = {"volatility": -2, "b_temp": -1} outcome = learning_function( prev_internal_states, params, self_choice=1, op_choice=1, level=0, agent=0, p_matrix=penny, ) assert abs(outcome["own_states"]["p_op_mean0"] - 0.44216598162254866) < 0.01 assert abs(outcome["own_states"]["p_op_var0"] - -0.12292276280308079) < 0.01
def test_PayoffMatrix(): staghunt = ts.PayoffMatrix(name="staghunt") assert staghunt.payoff(choice_agent0=1, choice_agent1=1, agent=0) == 5 assert staghunt.payoff(choice_agent0=1, choice_agent1=0, agent=0) == 0 assert staghunt.payoff(choice_agent0=0, choice_agent1=1, agent=0) == 3 chicken = ts.PayoffMatrix(name="chicken") assert chicken.payoff(0, 1, 0) == -1 dead = ts.PayoffMatrix(name="deadlock") assert dead.payoff(1, 0, 1) == 0 sexes = ts.PayoffMatrix(name="sexes") assert sexes.payoff(1, 1, 0) == 5 custom = ts.PayoffMatrix(name="custom", predefined=np.array( ([(10, 0), (0, 5)], [(5, 0), (0, 10)]))) prison = ts.PayoffMatrix(name="prisoners_dilemma") assert prison.payoff(choice_agent0=0, choice_agent1=1, agent=0) == 5 assert prison.payoff(choice_agent0=1, choice_agent1=1, agent=0) == 3 assert prison.payoff(choice_agent0=0, choice_agent1=0, agent=0) == 1
""" import sys from functools import partial sys.path.append("..") sys.path.append(".") from wasabi import msg import numpy as np from scipy.optimize import minimize import tomsup as ts # generating some sample data group = ts.create_agents(["1-ToM", "2-ToM"]) penny = ts.PayoffMatrix("penny_competitive") results = group.compete(p_matrix=penny, n_rounds=30, env="round_robin", save_history=True) def forced_choice_competition( agent0, agent1, choices_a0, choices_a1, p_matrix, agent_pov, ): "reruns a competition with forced choices"
def test_tutorial(): random.seed(1995) # initiate the competitive matching pennies game penny = ts.PayoffMatrix(name="penny_competitive") # print the payoff matrix print(penny) # define the random bias agent, which chooses 1 70 percent of the time, and call the agent "jung" jung = ts.RB(bias=0.7) # Examine Agent print(f"jung is a class of type: {type(jung)}") if isinstance(jung, ts.Agent): print(f"but jung is also an instance of the parent class ts.Agent") # let us have Jung make a choice choice = jung.compete() print( f"jung chose {choice} and his probability for choosing 1 was {jung.get_bias()}." ) # create a reinforcement learning agent skinner = ts.create_agents(agents="QL", start_params={"save_history": True}) # have the agents compete for 30 rounds results = ts.compete(jung, skinner, p_matrix=penny, n_rounds=4) # examine results print(results.head()) # inspect the first 5 rows of the dataframe # Creating a simple 1-ToM with default parameters tom_1 = ts.TOM(level=1, dilution=None, save_history=True) # Extract the parameters tom_1.print_parameters() tom_2 = ts.TOM( level=2, volatility=-2, b_temp=-2, # more deterministic bias=0, dilution=None, save_history=True, ) choice = tom_2.compete(p_matrix=penny, agent=0, op_choice=None) print("tom_2 choose:", choice) tom_2.reset() # reset before start prev_choice_1tom = None prev_choice_2tom = None for trial in range(1, 4): # note that op_choice is choice on previous turn # and that agent is the agent you respond to in the payoff matrix choice_1 = tom_1.compete(p_matrix=penny, agent=0, op_choice=prev_choice_1tom) choice_2 = tom_2.compete(p_matrix=penny, agent=1, op_choice=prev_choice_2tom) # update previous choice prev_choice_1tom = choice_1 prev_choice_2tom = choice_2 print( f"Round {trial}", f" 1-ToM choose {choice_1}", f" 2-ToM choose {choice_2}", sep="\n", ) tom_2.print_internal( keys=["p_k", "p_op"], level=[0, 1] # print these two states ) # for the agent simulated opponents 0-ToM and 1-ToM # Create a list of agents agents = ["RB", "QL", "WSLS", "1-TOM", "2-TOM"] # And set their starting parameters. An empty dictionary denotes default values start_params = [{"bias": 0.7}, {"learning_rate": 0.5}, {}, {}, {}] group = ts.create_agents(agents, start_params) # create a group of agents # Specify the environment # round_robin e.g. each agent will play against all other agents group.set_env(env="round_robin") # Finally, we make the group compete 20 simulations of 30 rounds results = group.compete(p_matrix=penny, n_rounds=4, n_sim=2, save_history=True) res = group.get_results() print(res.head(1)) # print the first row res.head(1) # res.to_json("tutorials/paper.ndjson", orient="records", lines=True) import matplotlib.pyplot as plt # Set figure size plt.rcParams["figure.figsize"] = [10, 10] # plot a heatmap of the rewards for all agent in the tournament group.plot_heatmap(cmap="RdBu", show=False) # plot the choices of the RB agent when competing against the Q-learning agent group.plot_choice( agent0="RB", agent1="QL", agent=0, plot_individual_sim=False, show=False ) # plot the score of the RB agent when competing against the Q-learning agent group.plot_score(agent0="RB", agent1="QL", agent=0, show=False) # plot 2-ToM estimate of its opponent sophistication level group.plot_p_k(agent0="1-TOM", agent1="2-TOM", agent=1, level=0, show=False) group.plot_p_k(agent0="1-TOM", agent1="2-TOM", agent=1, level=1, show=False) # plot 2-ToM estimate of its opponent's volatility while believing the opponent to be level 1. group.plot_tom_op_estimate( agent0="1-TOM", agent1="2-TOM", agent=1, estimate="volatility", level=1, plot="mean", show=False, ) # plot 2-ToM estimate of its opponent's bias while believing the opponent to be level 1. group.plot_tom_op_estimate( agent0="1-TOM", agent1="2-TOM", agent=1, estimate="bias", level=1, plot="mean", show=False, )
import tomsup as ts # Set seed random.seed(1995) # - Simulation settings - # n_tests = 20 n_sim = 8 n_rounds = 60 # (Short run) # n_tests = 2 # n_sim = 2 # n_rounds = 10 # Get payoff matrix penny_comp = ts.PayoffMatrix(name="penny_competitive") n_jobs = 4 # Create list of agents agents = ["2-ToM", "RB"] # Set parameters start_params = [{}, {}] # Initialize vector for populaitng with times elapsed_times = [None] * n_tests # pr = cProfile.Profile() # pr.enable() for test in range(n_tests):
def test_QL(): ql = ts.agent.QL() p_dilemma = ts.PayoffMatrix(name="prisoners_dilemma") assert ql.compete(p_matrix=p_dilemma, agent=0, op_choice=None) in [0, 1]
def test_TFT(): shelling = ts.agent.TFT(copy_prob=1) p_dilemma = ts.PayoffMatrix(name="prisoners_dilemma") assert 1 == shelling.compete(op_choice=1, p_matrix=p_dilemma) assert 0 == shelling.compete(op_choice=0, p_matrix=p_dilemma)
import numpy as np import pandas as pd from scipy.special import expit as inv_logit from scipy.special import logit as logit # Set seed for reporoducibility random.seed(2) # Simulation settings # n_sim = 2 n_sim = 100 # n_rounds = 2 n_rounds = 100 # Get payoff matrix penny_comp = ts.PayoffMatrix(name='penny_competitive') # Create list of agents all_agents = ['RB', 'WSLS', 'QL', '0-TOM', '1-TOM', '2-TOM', '3-TOM', '4-TOM', '5-TOM'] # Write down parameter means params_means = [0.8, 0.9, 0.9, 0.5, -2, -1, -2, -1, -2, -1, -2, -1, -2, -1, -2, -1] # And the variances of each mean (in this case all the same) params_vars = [0.1]*len(params_means) # Make empty list for inserting parameter values parvals = [0]*len(params_means)
def test_tutorial(): jung = ts.RB( bias=0.7, save_history=True ) # calling the agent subclass RB - for more on save_history see '3) inspecting Agent and AgentGroup' # Let's examine the jung print(f"jung is an class of type: {type(jung)}") if isinstance(jung, ts.Agent): print(f"but jung is also an instance of the parent class ts.Agent") # let us have Jung make a choice choice = jung.compete() print( f"jung chose {choice} and his probability for choosing 1 was {jung.get_bias()}." ) skinner = ts.create_agents(agents="QL", start_params={ "save_history": True }) # create a reinforcement learning agent penny = ts.PayoffMatrix(name="penny_competitive" ) # fetch the competitive matching pennies game. # print the payoff matrix print(penny) # fetch the underlying numpy matrix print(penny.get_matrix()) jung_a = jung.compete() # a for action skinner_a = skinner.compete( p_matrix=penny, agent=1, op_choice=None ) # Note that op_choice can be unspecified (or None) in the first round jung_p = penny.payoff(choice_agent0=jung_a, choice_agent1=skinner_a, agent=0) skinner_p = penny.payoff(choice_agent0=jung_a, choice_agent1=skinner_a, agent=1) print( f"jung chose {jung_a} and skinner chose {skinner_a}, which results in a payoff for jung of {jung_p} and skinner of {skinner_p}." ) # Note that you might get different results simply by chance results = ts.compete(jung, skinner, p_matrix=penny, n_rounds=4, save_history=True, verbose=True) print(type(results)) jung_sum = results["payoff_agent0"].sum() skinner_sum = results["payoff_agent1"].sum() print( f"jung seemed to get a total of {jung_sum} points, while skinner got a total of {skinner_sum}." ) results.head() # inspect the first 5 rows of the df results = ts.compete( jung, skinner, penny, n_rounds=4, n_sim=2, save_history=True, return_val="df", verbose=False, ) results.head() agents = ["RB", "QL", "WSLS"] # create a list of agents start_params = [ { "bias": 0.7 }, { "learning_rate": 0.5 }, {}, ] # create a list of their starting parameters (an empty dictionary {} simply assumes defaults) group = ts.create_agents(agents, start_params) # create a group of agents print(group) print("\n----\n") # to space out the outputs group.set_env( env="round_robin" ) # round_robin e.g. each agent will play against all other agents # make them compete group.compete(p_matrix=penny, n_rounds=4, n_sim=2, verbose=True) results = group.get_results() results.head() # examine the first 5 rows in results # What if I want to know the starting parameters? print("This is the starting parameters of jung: ", jung.get_start_params() ) # Note that it also prints out default parameters print("This is the starting parameters of skinner: ", skinner.get_start_params()) # What if I want to know the agent last choice? print("This is jung's last choice: ", jung.get_choice()) print("This is skinner's last choice: ", skinner.get_choice()) # What if I want to know the agents strategy? print("jung's strategy is: ", jung.get_strategy()) print("skinner's strategy is: ", skinner.get_strategy()) # What is the history of skinner (e.g. what is his choices and internal states) history = jung.get_history(format="df") print(history.head()) print("\n --- \n") # for spacing history = skinner.get_history(format="df") print(history.head(15)) # the first 15 rows ts.plot.score(results, agent0="RB", agent1="QL", agent=0, show=False) ts.plot.choice(results, agent0="RB", agent1="QL", agent=0, show=False) # Create a list of agents agents = ["RB", "QL", "WSLS", "1-TOM", "2-TOM"] # And set their starting parameters. An empty dict denotes default values start_params = [{"bias": 0.7}, {"learning_rate": 0.5}, {}, {}, {}] group = ts.create_agents(agents, start_params) # create a group of agents # Specify the environment # round_robin e.g. each agent will play against all other agents group.set_env(env="round_robin") # Finally, we make the group compete 20 simulations of 30 rounds group.compete(p_matrix=penny, n_rounds=4, n_sim=2, save_history=True) res = group.get_results() res.head(1) # print the first row import matplotlib.pyplot as plt # Set figure size plt.rcParams["figure.figsize"] = [10, 10] group.plot_heatmap(cmap="RdBu", show=False) group.plot_choice(agent0="RB", agent1="QL", agent=0, plot_individual_sim=False, show=False) group.plot_score(agent0="RB", agent1="QL", agent=0, show=False) group.plot_p_k(agent0="1-TOM", agent1="2-TOM", agent=1, level=0, show=False) group.plot_p_k(agent0="1-TOM", agent1="2-TOM", agent=1, level=1, show=False) group.plot_history( "1-TOM", "2-TOM", agent=1, state="", fun=lambda x: x["internal_states"]["own_states"]["p_op_mean"][0], show=False, ) df = group.get_results() print(df.loc[(df["agent0"] == "1-TOM") & (df["agent1"] == "2-TOM")]["history_agent1"][1] ["internal_states"]["opponent_states"][1]["own_states"]) # volatility group.plot_history( "1-TOM", "2-TOM", agent=1, state="", fun=lambda x: x["internal_states"]["opponent_states"][1]["own_states"][ "param_mean"][0, 0], ylab="Volalitity (log-odds)", show=False, ) # # behav temp group.plot_history( "1-TOM", "2-TOM", agent=1, state="", fun=lambda x: x["internal_states"]["opponent_states"][1]["own_states"][ "param_mean"][0, 1], ylab="Behavioral Temperature (log-odds)", show=False, ) # ktom simple example tom_1 = ts.TOM(level=1, dilution=None, save_history=True) # Extact the parameters print(tom_1.get_parameters()) tom_2 = ts.TOM( level=2, volatility=-2, b_temp=-2, # more deterministic bias=0, dilution=None, save_history=True, ) choice = tom_2.compete(p_matrix=penny, agent=0, op_choice=None) print(choice) tom_2.reset() # reset before start prev_choice_1tom = None prev_choice_2tom = None for trial in range(1, 4): # note that op_choice is choice on previous turn # and that agent is the agent you repond to the in payoff matrix choice_1 = tom_1.compete(p_matrix=penny, agent=0, op_choice=prev_choice_1tom) choice_2 = tom_2.compete(p_matrix=penny, agent=1, op_choice=prev_choice_2tom) # update previous choice prev_choice_1tom = choice_1 prev_choice_2tom = choice_2 print( f"Round {trial}", f" 1-ToM choose {choice_1}", f" 2-ToM choose {choice_2}", sep="\n", ) tom_2.print_internal(keys=["p_k", "p_op"], level=[0, 1])
def test_expected_payoff_fun(): staghunt = ts.PayoffMatrix(name="staghunt") assert expected_payoff_fun(1, agent=0, p_matrix=staghunt) == 2
def test_tutorial(): sigmund = ts.WSLS() # create agent # inspect sigmund print(f"sigmund is an class of type: {type(sigmund)}") # f is for format if isinstance(sigmund, ts.Agent): print(f"but sigmund is also of has the parent class ts.Agent") class ReversedWSLS(ts.Agent): # make sure that the parent class is ts.Agent """ ReversedWSLS: Win-switch, lose-stay. This agent is a reversed win-stay, lose-switch agent, which ... """ # add a docstring which explains the agent pass # we will later replace this pass with something else freud = ReversedWSLS() print(f"is freud an Agent? {isinstance(freud, ts.Agent)}") class ReversedWSLS(ts.Agent): """ ReversedWSLS: Win-switch, lose-stay. This agent is a reversed win-stay, lose-switch agent, which ... """ def __init__(self, first_move, **kwargs): # initalize the agent self.strategy = "ReversedWSLS" # set the strategy name # set internal parameters self.first_move = first_move super().__init__( **kwargs ) # pass additional argument the ts.Agent class (could e.g. include 'save_history = True') self._start_params = { "first_move": first_move, **kwargs, } # save any starting parameters used when the agent is reset freud = ReversedWSLS(first_move=1) print(f"what is freud's first move? {freud.first_move}") print(f"what is freud's an starting parameters? {freud.get_start_params()}") print(f"what is freud's strategy? {freud.get_strategy()}") class ReversedWSLS(ts.Agent): """ ReversedWSLS: Win-switch, lose-stay. This agent is a reversed win-stay, lose-switch agent, which ... """ def __init__(self, first_move, **kwargs): # initalize the agent self.strategy = "ReversedWSLS" # set the strategy name # set internal parameters self.first_move = first_move super().__init__( **kwargs ) # pass additional argument the ts.Agent class (could e.g. include 'save_history = True') self._start_params = { "first_move": first_move, **kwargs, } # save any starting parameters used when the agent is reset def compete(self, p_matrix, op_choice=None, agent=0): """ win-switch, lose-stay strategy, with the first move being set when the class is initilized (__init__()) p_matrix is a PayoffMatrix op_choice is either 1 or 0 agent is either 0 or 1 and indicated the perpective of the agent in the game (whether it is player 1 og 2) """ if ( self.choice is None ): # if a choice haven't been made: Choose the redifined first move self.choice = self.first_move # fetch from self else: # if a choice have been made: payoff = p_matrix.payoff( self.choice, op_choice, agent ) # calculate payoff of last round if payoff == 1: # if the agent won then switch self.choice = ( 1 - self.choice ) # save the choice in self (for next round) # also save any other internal states which you might # want the agent to keep for next round in self self._add_to_history( choice=self.choice ) # save action and (if any) internal states in history # note that _add_to_history() is not intented for # later use within the agent return self.choice # return choice which is either 1 or 0 freud = ReversedWSLS(first_move=1) # create the agent # fetch payoff matrix for the pennygame penny = ts.PayoffMatrix(name="penny_competitive") print( "This is the payoffmatrix for the game (seen from freud's perspective):", penny()[0, :, :], sep="\n", ) # have freud compete choice = freud.compete(penny) print(f"what is freud's choice the first round? {choice}") choice = freud.compete(penny, op_choice=1) print(f"what is freud's choice the second round if his opponent chose 1? {choice}") class ReversedWSLS(ts.Agent): """ ReversedWSLS: Win-switch, lose-stay. This agent is a reversed win-stay, lose-switch agent, which ... Examples: >>> waade = ReversedWSLS(first_move = 1) >>> waade.compete(op_choice = None, p_matrix = penny) 1 """ def __init__(self, first_move, **kwargs): self.strategy = "ReversedWSLS" # set internal parameters self.first_move = first_move super().__init__( **kwargs ) # pass additional argument the ts.Agent class (could e.g. include 'save_history = True') self._start_params = { "first_move": first_move, **kwargs, } # save any starting parameters used when the agent is reset def compete(self, p_matrix, op_choice=None): if ( self.choice is None ): # if a choice haven't been made: Choose the redifined first move self.choice = self.first_move # fetch from self else: # if a choice have been made: payoff = p_matrix.payoff( self.choice, op_choice, 0 ) # calculate payoff of last round if payoff == 1: # if the agent won then switch self.choice = ( 1 - self.choice ) # save the choice in self (for next round) # also save any other internal states which you might # want the agent to keep for next round in self self._add_to_history( choice=self.choice ) # save action and (if any) internal states in history # note that _add_to_history() is not intented for # later use within the agent return self.choice # return choice # define any additional function you wish the class should have def get_first_move(self): return self.first_move