def func_to_minimize( k: int, # optimizable variable volatility: float, # optimizable variable b_temp: float, # optimizable variable bias: float, # optimizable variable dilution: float, # optimizable variable choices_agent=results.choice_agent0, # known variables choices_opponent=results.choice_agent1, # known variables opponent=ts.create_agents("2-ToM"), # known variables agent_pov=0, # known variables ): agent0 = ts.create_agents(f"{k}-ToM", volatility=volatility, b_temp=b_temp, bias=bias, dilution=dilution) p_choices = list( forced_choice_competition( agent0=agent0, agent1=opponent, choices_a0=choices_agent, choices_a1=choices_opponent, p_matrix=penny, agent_pov=agent_pov, )) # euclidian distance between actual choices and probability of the simulated agents return np.linalg.norm(np.array(choices_agent) - np.array(p_choices))
def test_tutorial(): random.seed(1995) # initiate the competitive matching pennies game penny = ts.PayoffMatrix(name="penny_competitive") # print the payoff matrix print(penny) # define the random bias agent, which chooses 1 70 percent of the time, and call the agent "jung" jung = ts.RB(bias=0.7) # Examine Agent print(f"jung is a class of type: {type(jung)}") if isinstance(jung, ts.Agent): print(f"but jung is also an instance of the parent class ts.Agent") # let us have Jung make a choice choice = jung.compete() print( f"jung chose {choice} and his probability for choosing 1 was {jung.get_bias()}." ) # create a reinforcement learning agent skinner = ts.create_agents(agents="QL", start_params={"save_history": True}) # have the agents compete for 30 rounds results = ts.compete(jung, skinner, p_matrix=penny, n_rounds=4) # examine results print(results.head()) # inspect the first 5 rows of the dataframe # Creating a simple 1-ToM with default parameters tom_1 = ts.TOM(level=1, dilution=None, save_history=True) # Extract the parameters tom_1.print_parameters() tom_2 = ts.TOM( level=2, volatility=-2, b_temp=-2, # more deterministic bias=0, dilution=None, save_history=True, ) choice = tom_2.compete(p_matrix=penny, agent=0, op_choice=None) print("tom_2 choose:", choice) tom_2.reset() # reset before start prev_choice_1tom = None prev_choice_2tom = None for trial in range(1, 4): # note that op_choice is choice on previous turn # and that agent is the agent you respond to in the payoff matrix choice_1 = tom_1.compete(p_matrix=penny, agent=0, op_choice=prev_choice_1tom) choice_2 = tom_2.compete(p_matrix=penny, agent=1, op_choice=prev_choice_2tom) # update previous choice prev_choice_1tom = choice_1 prev_choice_2tom = choice_2 print( f"Round {trial}", f" 1-ToM choose {choice_1}", f" 2-ToM choose {choice_2}", sep="\n", ) tom_2.print_internal( keys=["p_k", "p_op"], level=[0, 1] # print these two states ) # for the agent simulated opponents 0-ToM and 1-ToM # Create a list of agents agents = ["RB", "QL", "WSLS", "1-TOM", "2-TOM"] # And set their starting parameters. An empty dictionary denotes default values start_params = [{"bias": 0.7}, {"learning_rate": 0.5}, {}, {}, {}] group = ts.create_agents(agents, start_params) # create a group of agents # Specify the environment # round_robin e.g. each agent will play against all other agents group.set_env(env="round_robin") # Finally, we make the group compete 20 simulations of 30 rounds results = group.compete(p_matrix=penny, n_rounds=4, n_sim=2, save_history=True) res = group.get_results() print(res.head(1)) # print the first row res.head(1) # res.to_json("tutorials/paper.ndjson", orient="records", lines=True) import matplotlib.pyplot as plt # Set figure size plt.rcParams["figure.figsize"] = [10, 10] # plot a heatmap of the rewards for all agent in the tournament group.plot_heatmap(cmap="RdBu", show=False) # plot the choices of the RB agent when competing against the Q-learning agent group.plot_choice( agent0="RB", agent1="QL", agent=0, plot_individual_sim=False, show=False ) # plot the score of the RB agent when competing against the Q-learning agent group.plot_score(agent0="RB", agent1="QL", agent=0, show=False) # plot 2-ToM estimate of its opponent sophistication level group.plot_p_k(agent0="1-TOM", agent1="2-TOM", agent=1, level=0, show=False) group.plot_p_k(agent0="1-TOM", agent1="2-TOM", agent=1, level=1, show=False) # plot 2-ToM estimate of its opponent's volatility while believing the opponent to be level 1. group.plot_tom_op_estimate( agent0="1-TOM", agent1="2-TOM", agent=1, estimate="volatility", level=1, plot="mean", show=False, ) # plot 2-ToM estimate of its opponent's bias while believing the opponent to be level 1. group.plot_tom_op_estimate( agent0="1-TOM", agent1="2-TOM", agent=1, estimate="bias", level=1, plot="mean", show=False, )
This script fits the k-ToM model to data using scipy.optimize() """ import sys from functools import partial sys.path.append("..") sys.path.append(".") from wasabi import msg import numpy as np from scipy.optimize import minimize import tomsup as ts # generating some sample data group = ts.create_agents(["1-ToM", "2-ToM"]) penny = ts.PayoffMatrix("penny_competitive") results = group.compete(p_matrix=penny, n_rounds=30, env="round_robin", save_history=True) def forced_choice_competition( agent0, agent1, choices_a0, choices_a1, p_matrix, agent_pov, ):
# Initialize vector for populaitng with times elapsed_times = [None] * n_tests # pr = cProfile.Profile() # pr.enable() for test in range(n_tests): # print(test) # Get start time start_time = time() # Make group group = ts.create_agents(agents, start_params) # Set as round robin tournament group.set_env(env="round_robin") # Run tournament results = group.compete( p_matrix=penny_comp, n_rounds=n_rounds, n_sim=n_sim, save_history=False, verbose=False, n_jobs=n_jobs, ) # Save elapsed time in vector
#Make a group competition agents = ['RB', '0-TOM', '2-TOM'] #Select agents params = [ { 'bias': 0.7 }, #RB's parameters {}, #0-ToM's parameters (empty means default) { 'volatility': -3, 'b_temp': 0, 'dilution': 0.1 } ] #2-ToM's parameters group = ts.create_agents(agents, params) #Create the group type(group) group.set_env( env='round_robin' ) #Set the tournament structure. Right now there's only one option print(group) #check the group settings #Make the group compete results = group.compete(p_matrix=penny, n_rounds=40, n_sim=4) results.head() #examine the first 5 rows in results #Plot score and choices for some competing agents (WORK IN PROGRESS) ts.plot.score(results, agent0="RB", agent1="0-TOM", agent=0) ts.plot.choice(results, agent0="RB", agent1="2-TOM", agent=0) #It's also possible for people to play against agents from tomsup. We have made a basic psychopy script for this.
for v in vol: for b in bias: for beta in b_temp: agents.append(f"{k}-tom") args.append({"bias": b, "b_temp": beta, "volatility": v}) return agents, args agents, args = make_grid(k, vol_r, bias_r, b_temp_r) a = "3-tom" agents.append(a) args.append({}) # generating some sample data group = ts.create_agents(agents, args) penny = ts.PayoffMatrix("penny_competitive") group.set_env("round_robin") # remove non-2tom pairs group.pairing = [ pair for pair in group.pairing if a in pair[0] or a in pair[1] ] results = group.compete(p_matrix=penny, n_rounds=n_rounds, save_history=True, n_jobs=-1) ### Extract 3-toms recoved parameters assert results.agent1.unique()[0] == "3-tom"
popup.addField("Number of trials", 2) popup.show() if popup.OK: ID = popup.data[0] age = popup.data[1] gender = popup.data[2] k = popup.data[3] n_trials = popup.data[4] elif popup.Cancel: core.quit print(f"this is {k}") # ------------- create agent and payoff matrix --------- tom = ts.create_agents(agents="RB") # this need to be changed penny = ts.PayoffMatrix(name="penny_competitive") # ------------- Defining Variables and function --------- intro0 = f"""Dear participant In the following experiment you will compete against another person in the matching pennies game for {n_trials}. Before starting the experiment, we would like to inform you that no personal information is collected, and that beside the given information no personal information will be recorded. If at any time you should feel uncomfortable, you are free to stop the experiment and ask for any generated data to be deleted. If you have read the above and agree to proceed, press ENTER.""" intro1 = f"""We will now briefly explain the rules of the game. You will see two closed hands. Your opponent will have hidden a penny in either one of them. Yours goal is to figure out which of the two hands contain the penny. If you guess right, you get a point, if not, your opponnent gains a point. If you have read the above and understand the rules, press ENTER."""
def test_tutorial(): jung = ts.RB( bias=0.7, save_history=True ) # calling the agent subclass RB - for more on save_history see '3) inspecting Agent and AgentGroup' # Let's examine the jung print(f"jung is an class of type: {type(jung)}") if isinstance(jung, ts.Agent): print(f"but jung is also an instance of the parent class ts.Agent") # let us have Jung make a choice choice = jung.compete() print( f"jung chose {choice} and his probability for choosing 1 was {jung.get_bias()}." ) skinner = ts.create_agents(agents="QL", start_params={ "save_history": True }) # create a reinforcement learning agent penny = ts.PayoffMatrix(name="penny_competitive" ) # fetch the competitive matching pennies game. # print the payoff matrix print(penny) # fetch the underlying numpy matrix print(penny.get_matrix()) jung_a = jung.compete() # a for action skinner_a = skinner.compete( p_matrix=penny, agent=1, op_choice=None ) # Note that op_choice can be unspecified (or None) in the first round jung_p = penny.payoff(choice_agent0=jung_a, choice_agent1=skinner_a, agent=0) skinner_p = penny.payoff(choice_agent0=jung_a, choice_agent1=skinner_a, agent=1) print( f"jung chose {jung_a} and skinner chose {skinner_a}, which results in a payoff for jung of {jung_p} and skinner of {skinner_p}." ) # Note that you might get different results simply by chance results = ts.compete(jung, skinner, p_matrix=penny, n_rounds=4, save_history=True, verbose=True) print(type(results)) jung_sum = results["payoff_agent0"].sum() skinner_sum = results["payoff_agent1"].sum() print( f"jung seemed to get a total of {jung_sum} points, while skinner got a total of {skinner_sum}." ) results.head() # inspect the first 5 rows of the df results = ts.compete( jung, skinner, penny, n_rounds=4, n_sim=2, save_history=True, return_val="df", verbose=False, ) results.head() agents = ["RB", "QL", "WSLS"] # create a list of agents start_params = [ { "bias": 0.7 }, { "learning_rate": 0.5 }, {}, ] # create a list of their starting parameters (an empty dictionary {} simply assumes defaults) group = ts.create_agents(agents, start_params) # create a group of agents print(group) print("\n----\n") # to space out the outputs group.set_env( env="round_robin" ) # round_robin e.g. each agent will play against all other agents # make them compete group.compete(p_matrix=penny, n_rounds=4, n_sim=2, verbose=True) results = group.get_results() results.head() # examine the first 5 rows in results # What if I want to know the starting parameters? print("This is the starting parameters of jung: ", jung.get_start_params() ) # Note that it also prints out default parameters print("This is the starting parameters of skinner: ", skinner.get_start_params()) # What if I want to know the agent last choice? print("This is jung's last choice: ", jung.get_choice()) print("This is skinner's last choice: ", skinner.get_choice()) # What if I want to know the agents strategy? print("jung's strategy is: ", jung.get_strategy()) print("skinner's strategy is: ", skinner.get_strategy()) # What is the history of skinner (e.g. what is his choices and internal states) history = jung.get_history(format="df") print(history.head()) print("\n --- \n") # for spacing history = skinner.get_history(format="df") print(history.head(15)) # the first 15 rows ts.plot.score(results, agent0="RB", agent1="QL", agent=0, show=False) ts.plot.choice(results, agent0="RB", agent1="QL", agent=0, show=False) # Create a list of agents agents = ["RB", "QL", "WSLS", "1-TOM", "2-TOM"] # And set their starting parameters. An empty dict denotes default values start_params = [{"bias": 0.7}, {"learning_rate": 0.5}, {}, {}, {}] group = ts.create_agents(agents, start_params) # create a group of agents # Specify the environment # round_robin e.g. each agent will play against all other agents group.set_env(env="round_robin") # Finally, we make the group compete 20 simulations of 30 rounds group.compete(p_matrix=penny, n_rounds=4, n_sim=2, save_history=True) res = group.get_results() res.head(1) # print the first row import matplotlib.pyplot as plt # Set figure size plt.rcParams["figure.figsize"] = [10, 10] group.plot_heatmap(cmap="RdBu", show=False) group.plot_choice(agent0="RB", agent1="QL", agent=0, plot_individual_sim=False, show=False) group.plot_score(agent0="RB", agent1="QL", agent=0, show=False) group.plot_p_k(agent0="1-TOM", agent1="2-TOM", agent=1, level=0, show=False) group.plot_p_k(agent0="1-TOM", agent1="2-TOM", agent=1, level=1, show=False) group.plot_history( "1-TOM", "2-TOM", agent=1, state="", fun=lambda x: x["internal_states"]["own_states"]["p_op_mean"][0], show=False, ) df = group.get_results() print(df.loc[(df["agent0"] == "1-TOM") & (df["agent1"] == "2-TOM")]["history_agent1"][1] ["internal_states"]["opponent_states"][1]["own_states"]) # volatility group.plot_history( "1-TOM", "2-TOM", agent=1, state="", fun=lambda x: x["internal_states"]["opponent_states"][1]["own_states"][ "param_mean"][0, 0], ylab="Volalitity (log-odds)", show=False, ) # # behav temp group.plot_history( "1-TOM", "2-TOM", agent=1, state="", fun=lambda x: x["internal_states"]["opponent_states"][1]["own_states"][ "param_mean"][0, 1], ylab="Behavioral Temperature (log-odds)", show=False, ) # ktom simple example tom_1 = ts.TOM(level=1, dilution=None, save_history=True) # Extact the parameters print(tom_1.get_parameters()) tom_2 = ts.TOM( level=2, volatility=-2, b_temp=-2, # more deterministic bias=0, dilution=None, save_history=True, ) choice = tom_2.compete(p_matrix=penny, agent=0, op_choice=None) print(choice) tom_2.reset() # reset before start prev_choice_1tom = None prev_choice_2tom = None for trial in range(1, 4): # note that op_choice is choice on previous turn # and that agent is the agent you repond to the in payoff matrix choice_1 = tom_1.compete(p_matrix=penny, agent=0, op_choice=prev_choice_1tom) choice_2 = tom_2.compete(p_matrix=penny, agent=1, op_choice=prev_choice_2tom) # update previous choice prev_choice_1tom = choice_1 prev_choice_2tom = choice_2 print( f"Round {trial}", f" 1-ToM choose {choice_1}", f" 2-ToM choose {choice_2}", sep="\n", ) tom_2.print_internal(keys=["p_k", "p_op"], level=[0, 1])