def reset(self, nA, a0, mu, firm0, firm1, gamma): self.b = 0 # begins in a cooperative phase self.W = 0 # Payoff balance initiates at 0 self.c_act = colab_actions[str((firm0, firm1))][0] self.c_act1 = colab_actions[str((firm0, firm1))][1] self.d_act = nash_actions[str((firm0, firm1))][0] self.d_act1 = nash_actions[str((firm0, firm1))][1] self.a0 = a0 self.mu = mu self.firm0 = firm0 self.firm1 = firm1 self.nA = nA self.total_pg = [] act_colab = np.array([self.c_act, self.c_act1]) cheat_act = np.array([self.c_act, self.d_act1]) self.profit1_cc = profit(act_colab, self.a0, self.mu, self.firm0, self.firm1, self.nA)[1] self.Q1_cc = gamma * self.profit1_cc / ( 1 - gamma) # infinite sum of discounted p_cc starting next period self.profit1_cd = profit(cheat_act, self.a0, self.mu, self.firm0, self.firm1, self.nA)[1] self.T = 0.1 * ( self.profit1_cd - self.profit1_cc ) # Threshold for how much profit gain rival can gather before amTFT punishes self.punishlen = punishlen return
def reset(self, firm0, firm1, eparams): action0 = self.np_random.uniform(low=0, high=nA) action1 = self.np_random.uniform(low=0, high=nA) #self.vert0 = firm0['quality'] #self.vert1 = firm1['quality'] self.firm0 = firm0 self.firm1 = firm1 self.nash_act = eparams['nash_actions'][str((self.firm0, self.firm1))] self.baseline = profit(self.nash_act, A0, MU, self.firm0, self.firm1, nA) self.randomness = eparams['randomness'] * self.baseline self.state = np.array([action0, action1]) return np.array(self.state)
def step(self, action0, action1): # action made by the "meta agent", i.e. all market participants' joint action action = np.array([action0, action1]) #assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) # TODO: make this work (threw assertionerror for valid actions) a0 = A0 + np.random.normal(0, 1) * self.randomness reward = profit(action, a0 = a0, mu = MU, firm0 = self.firm0, firm1 = self.firm1, nA = nA) #subtract baseline reward and add randomness reward = reward - self.baseline # new state self.state = np.array([action[0], action[1]]) if np.random.random() > (1-p_end): done = True else: done = False return self.state, reward, done, {}
def reset(self, firm0, firm1): min_profit = -0.1 max_profit = 0.1 profit0 = self.np_random.uniform(low=min_profit, high=max_profit) action0 = self.np_random.uniform(low=0, high=nA) profit1 = self.np_random.uniform(low=min_profit, high=max_profit) action1 = self.np_random.uniform(low=0, high=nA) self.vert0 = firm0['quality'] self.vert1 = firm1['quality'] self.firm0 = firm0 self.firm1 = firm1 self.nash_act = eparams['nash_actions'][str((self.firm0, self.firm1))] self.baseline = profit(self.nash_act, A0, MU, self.firm0, self.firm1, nA) self.state = np.array( [profit0, action0, profit1, action1, self.vert0, self.vert1]) return np.array(self.state)
def act(self, act1): meta_act = np.array([self.c_act, act1]) r1 = profit(meta_act, self.a0, self.mu, self.firm0, self.firm1, self.nA)[1] deviation = r1 + self.Q1_cc - self.profit1_cc - self.Q1_cc deviation = max(deviation, 0) # no negative credits will be rewarded if self.b == 0: act = self.c_act self.W = self.W + deviation else: act = self.d_act self.b = self.b - 1 if self.W > self.T: self.b = self.punishlen self.W = 0 return (act)
econ_params = ECONPARAMS['base_case'] from calc_nash_monopoly import profit, nash_action, monopoly_action import itertools import pandas as pd a0 = econ_params['a0'] mu = econ_params['mu'] nA = 7 firm0 = {'cost': 1, 'quality': 2} firm1 = {'cost': 1, 'quality': 2} acts = itertools.product(np.arange(nA), np.arange(nA)) profit_df = np.zeros((nA * nA, 5)) ix = 0 nash_act = nash_action(nA, a0, mu, firm0, firm1) monopoly_act = monopoly_action(nA, a0, mu, firm0, firm1) nash_profit = profit(nash_act, a0, mu, firm0, firm1, nA)[0] monopoly_profit = profit(monopoly_act, a0, mu, firm0, firm1, nA)[0] for act in acts: action = np.array(act) profit_0 = profit(action, a0, mu, firm0, firm1, nA)[0] profit_df[ix][0] = action[0] profit_df[ix][1] = action[1] profit_df[ix][2] = profit_0 profit_df[ix][3] = nash_profit profit_df[ix][4] = monopoly_profit ix = ix + 1 df = pd.DataFrame(profit_df) df.to_csv("actact_profit.csv")
def testing(firmlist, agent, params, eparams,saveto): ''' testing tests a given agent and saves a dataframe to a file An instance of class Agent is tested in an economic environment determined by the firmlist, params, and eparams. Testng occurs by exposing the agent to all combinations of possible firms present in the firmlist argument and results are measured by recording the profit, as well as important values used to evaluate how good this profit is. The recorded values are appended to a dictionary that is then turned into a pandas data frame that I export to a .csv file specified by the argument saveto INPUT: firmlist...a list of combinations of firms agent......object of class Agent that has an already trained network params.....dict of hyperparameters relating to code mechanics eparams....dict of hyperparameters relating to the economic env saveto.....string specifying to where dataframe should be saved. OUTPUT: Function does not return anything; it does save a pandas data frame to file 'saveto' though. This file can be read R and be used to produced graphical output. ''' nA = params['nA'] A0 = eparams['a0'] MU = eparams['mu'] firm0 = firmlist[0] firm1 = firmlist[0] env = ContBertrand(firm0, firm1) agent.net.eval() df = [] # Make econ variables dict_key = str((firm0, firm1)) nash_action = eparams['nash_actions'][dict_key] monopoly_action = eparams['monopoly_actions'][dict_key] nash_profit = profit(nash_action, A0, MU, firm0, firm1, nA) monopoly_profit = profit(monopoly_action, A0, MU, firm0, firm1, nA) # Initiate new env and amTFT agent s_next = env.reset(firm0, firm1) done = False frame_idx = 0 firm_ix = 0 for t in range(1, (len(firmlist)+1)*1000): if done: # Save episodal reward # TODO: expand set of firms firm0 = firmlist[firm_ix][0] firm1 = firmlist[firm_ix][1] firm_ix += 1 # Make econ variables dict_key = str((firm0, firm1)) nash_action = eparams['nash_actions'][dict_key] monopoly_action = eparams['monopoly_actions'][dict_key] nash_profit = profit(nash_action, A0, MU, firm0, firm1, nA) monopoly_profit = profit(monopoly_action, A0, MU, firm0, firm1, nA) # Initiate new env and amTFT agent s_next = env.reset(firm0, firm1) done = False frame_idx += 1 epsilon = 0 s = s_next action0 = agent.act(s[np.array([0,1,4,5])], epsilon) action1 = agent.act(s[np.array([2,3,4,5])], epsilon) s_next, reward_n, done, _ = env.step(action0, action1) done = False # Overwrite whatever may have come out before if frame_idx % 1000 == 0: done = True if reward_n is not None: reward = reward_n[0] pg = reward #pg = profit_gain(reward, nash_profit, colab_profit)[0] # important to index here agent.total_pg.append(pg) df.append({'vertdiff0': firm0['quality'], 'vertdiff1': firm1['quality'], 'reward0': reward_n[0], 'reward1': reward_n[1], 'nash0': nash_profit[0], 'nash1': nash_profit[1], 'mon0': monopoly_profit[0], 'mon1': monopoly_profit[1]}) df = pd.DataFrame(df) df.to_csv(saveto) return
def testing(firmlist, agent, params, eparams,saveto, cheat): ''' testing tests a given agent and saves a dataframe to a file An instance of class Agent is tested in an economic environment determined by the firmlist, params, and eparams. Testng occurs by exposing the agent to all combinations of possible firms present in the firmlist argument and results are measured by recording the profit, as well as important values used to evaluate how good this profit is. The recorded values are appended to a dictionary that is then turned into a pandas data frame that I export to a .csv file specified by the argument saveto INPUT: firmlist...a list of combinations of firms agent......object of class Agent that has an already trained network params.....dict of hyperparameters relating to code mechanics eparams....dict of hyperparameters relating to the economic env saveto.....string specifying to where dataframe should be saved. cheat......boolean. True if studying irf of a cheat. OUTPUT: Function does not return anything; it does save a pandas data frame to file 'saveto' though. This file can be read R and be used to produced graphical output. ''' nA = params['nA'] A0 = eparams['a0'] MU = eparams['mu'] nash_actions = actions_dict(nA, A0, MU, firmlist, firmlist, "nash") mon_actions = actions_dict(nA, A0, MU, firmlist, firmlist, "monopoly") firmprod = itertools.product(firmlist, firmlist) firmlist_cart = [] for element in firmprod: firmlist_cart.append(element) firm0 = firmlist_cart[0][0] firm1 = firmlist_cart[0][0] env = ContBertrand(firm0, firm1, eparams) agent.net.eval() df = [] # Make econ variables dict_key = str((firm0, firm1)) nash_action = nash_actions[dict_key] monopoly_action = mon_actions[dict_key] nash_profit = profit(nash_action, A0, MU, firm0, firm1, nA) monopoly_profit = profit(monopoly_action, A0, MU, firm0, firm1, nA) # Initiate new env and amTFT agent s_next = env.reset(firm0, firm1, eparams) done = False frame_idx = 0 firm_idx = 0 epsilon = 0 obs_firm0 = np.array([0,1]) obs_firm1 = np.array([1,0]) # For sequentiality, I need to initiate action1 action1 = agent.act(s_next[obs_firm1], 0) action0 = agent.act(s_next[obs_firm0], 0) for t in range(1, (len(firmlist_cart)+1)*1000): if done: # Save episodal reward firm0 = firmlist_cart[firm_idx][0] firm1 = firmlist_cart[firm_idx][1] firm_idx += 1 # Make econ variables dict_key = str((firm0, firm1)) nash_action = nash_actions[dict_key] monopoly_action = mon_actions[dict_key] nash_profit = profit(nash_action, A0, MU, firm0, firm1, nA) monopoly_profit = profit(monopoly_action, A0, MU, firm0, firm1, nA) # Initiate new env s_next = env.reset(firm0, firm1, eparams) done = False frame_idx += 1 s = s_next # Sequentiality of action choices if frame_idx % 2 == 0: # Update only agent0's action action0 = agent.act(s[obs_firm0], epsilon) else: # Update only agent1's action action1 = agent.act(s[obs_firm1], epsilon) if cheat and frame_idx == 100: action0 = 1 s_next, reward_n, done, _ = env.step(action0, action1) done = False # Overwrite whatever may have come out before if frame_idx % 1000 == 0: done = True if reward_n is not None: df.append({'vertdiff0': firm0['quality'], 'vertdiff1': firm1['quality'], 'reward0': reward_n[0], 'reward1': reward_n[1], 'nash0': nash_profit[0], 'nash1': nash_profit[1], 'mon0': monopoly_profit[0], 'mon1': monopoly_profit[1], 'index': frame_idx, 'firm_index': firm_idx}) df = pd.DataFrame(df) df.to_csv(saveto) return