Beispiel #1
0
    def reset(self, nA, a0, mu, firm0, firm1, gamma):
        self.b = 0  # begins in a cooperative phase
        self.W = 0  # Payoff balance initiates at 0

        self.c_act = colab_actions[str((firm0, firm1))][0]
        self.c_act1 = colab_actions[str((firm0, firm1))][1]
        self.d_act = nash_actions[str((firm0, firm1))][0]
        self.d_act1 = nash_actions[str((firm0, firm1))][1]

        self.a0 = a0
        self.mu = mu
        self.firm0 = firm0
        self.firm1 = firm1
        self.nA = nA
        self.total_pg = []
        act_colab = np.array([self.c_act, self.c_act1])
        cheat_act = np.array([self.c_act, self.d_act1])
        self.profit1_cc = profit(act_colab, self.a0, self.mu, self.firm0,
                                 self.firm1, self.nA)[1]
        self.Q1_cc = gamma * self.profit1_cc / (
            1 - gamma)  # infinite sum of discounted p_cc starting next period

        self.profit1_cd = profit(cheat_act, self.a0, self.mu, self.firm0,
                                 self.firm1, self.nA)[1]
        self.T = 0.1 * (
            self.profit1_cd - self.profit1_cc
        )  # Threshold for how much profit gain rival can gather before amTFT punishes
        self.punishlen = punishlen
        return
Beispiel #2
0
 def reset(self, firm0, firm1, eparams):
     action0 = self.np_random.uniform(low=0, high=nA)
     action1 = self.np_random.uniform(low=0, high=nA)
     #self.vert0 = firm0['quality']
     #self.vert1 = firm1['quality']
     self.firm0 = firm0
     self.firm1 = firm1
     self.nash_act = eparams['nash_actions'][str((self.firm0, self.firm1))]
     self.baseline = profit(self.nash_act, A0, MU, self.firm0, self.firm1, nA)
     self.randomness = eparams['randomness'] * self.baseline 
     self.state = np.array([action0, action1]) 
     return np.array(self.state)
Beispiel #3
0
 def step(self, action0, action1):
     # action made by the "meta agent", i.e. all market participants' joint action
     action = np.array([action0, action1])
     #assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) # TODO: make this work (threw assertionerror for valid actions)
     a0 = A0 + np.random.normal(0, 1) * self.randomness
     reward = profit(action, a0 = a0, mu = MU, firm0 = self.firm0, firm1 = self.firm1, nA = nA)
     #subtract baseline reward and add randomness
     reward = reward - self.baseline
     # new state
     self.state = np.array([action[0], action[1]])
     if np.random.random() > (1-p_end):
         done = True
     else:
         done = False
     return self.state, reward, done, {}
Beispiel #4
0
    def reset(self, firm0, firm1):
        min_profit = -0.1
        max_profit = 0.1

        profit0 = self.np_random.uniform(low=min_profit, high=max_profit)
        action0 = self.np_random.uniform(low=0, high=nA)
        profit1 = self.np_random.uniform(low=min_profit, high=max_profit)
        action1 = self.np_random.uniform(low=0, high=nA)
        self.vert0 = firm0['quality']
        self.vert1 = firm1['quality']
        self.firm0 = firm0
        self.firm1 = firm1
        self.nash_act = eparams['nash_actions'][str((self.firm0, self.firm1))]
        self.baseline = profit(self.nash_act, A0, MU, self.firm0, self.firm1,
                               nA)

        self.state = np.array(
            [profit0, action0, profit1, action1, self.vert0, self.vert1])
        return np.array(self.state)
Beispiel #5
0
    def act(self, act1):
        meta_act = np.array([self.c_act, act1])
        r1 = profit(meta_act, self.a0, self.mu, self.firm0, self.firm1,
                    self.nA)[1]

        deviation = r1 + self.Q1_cc - self.profit1_cc - self.Q1_cc
        deviation = max(deviation, 0)  # no negative credits will be rewarded

        if self.b == 0:
            act = self.c_act
            self.W = self.W + deviation
        else:
            act = self.d_act
            self.b = self.b - 1

        if self.W > self.T:
            self.b = self.punishlen
            self.W = 0
        return (act)
econ_params = ECONPARAMS['base_case']
from calc_nash_monopoly import profit, nash_action, monopoly_action
import itertools
import pandas as pd

a0 = econ_params['a0']
mu = econ_params['mu']
nA = 7
firm0 = {'cost': 1, 'quality': 2}
firm1 = {'cost': 1, 'quality': 2}

acts = itertools.product(np.arange(nA), np.arange(nA))
profit_df = np.zeros((nA * nA, 5))
ix = 0
nash_act = nash_action(nA, a0, mu, firm0, firm1)
monopoly_act = monopoly_action(nA, a0, mu, firm0, firm1)
nash_profit = profit(nash_act, a0, mu, firm0, firm1, nA)[0]
monopoly_profit = profit(monopoly_act, a0, mu, firm0, firm1, nA)[0]

for act in acts:
    action = np.array(act)
    profit_0 = profit(action, a0, mu, firm0, firm1, nA)[0]
    profit_df[ix][0] = action[0]
    profit_df[ix][1] = action[1]
    profit_df[ix][2] = profit_0
    profit_df[ix][3] = nash_profit
    profit_df[ix][4] = monopoly_profit
    ix = ix + 1

df = pd.DataFrame(profit_df)
df.to_csv("actact_profit.csv")
def testing(firmlist, agent, params, eparams,saveto):
    '''
    testing tests a given agent and saves a dataframe to a file
    An instance of class Agent is tested in an economic environment determined
    by the firmlist, params, and eparams. Testng occurs by exposing the agent
    to all combinations of possible firms present in the firmlist argument and 
    results are measured by recording the profit, as well as important values
    used to evaluate how good this profit is. The recorded values are appended
    to a dictionary that is then turned into a pandas data frame that I export
    to a .csv file specified by the argument saveto
    INPUT:
        firmlist...a list of combinations of firms
        agent......object of class Agent that has an already trained network
        params.....dict of hyperparameters relating to code mechanics
        eparams....dict of hyperparameters relating to the economic env
        saveto.....string specifying to where dataframe should be saved.
    OUTPUT:
        Function does not return anything; it does save a pandas data frame
        to file 'saveto' though. This file can be read R and be used to 
        produced graphical output.
    
    '''
    nA = params['nA']
    A0 = eparams['a0']
    MU = eparams['mu']
    firm0 = firmlist[0]
    firm1 = firmlist[0]
    env = ContBertrand(firm0, firm1)
    agent.net.eval()
    df = []
    
    # Make econ variables
    dict_key = str((firm0, firm1))
    nash_action = eparams['nash_actions'][dict_key]
    monopoly_action = eparams['monopoly_actions'][dict_key]
    nash_profit = profit(nash_action, A0, MU, firm0, firm1, nA)
    monopoly_profit = profit(monopoly_action, A0, MU, firm0, firm1, nA)
    
    # Initiate new env and amTFT agent
    s_next = env.reset(firm0, firm1)
    done = False
    frame_idx = 0
    firm_ix = 0
    for t in range(1, (len(firmlist)+1)*1000):
        if done:
            # Save episodal reward
            # TODO: expand set of firms
            firm0 = firmlist[firm_ix][0]
            firm1 = firmlist[firm_ix][1]
            firm_ix += 1
            # Make econ variables
            dict_key = str((firm0, firm1))
            nash_action = eparams['nash_actions'][dict_key]
            monopoly_action = eparams['monopoly_actions'][dict_key]
            nash_profit = profit(nash_action, A0, MU, firm0, firm1, nA)
            monopoly_profit = profit(monopoly_action, A0, MU, firm0, firm1, nA)
            
            # Initiate new env and amTFT agent
            s_next = env.reset(firm0, firm1)
            done = False
            
        frame_idx += 1
        epsilon = 0
        s = s_next
        
        action0 = agent.act(s[np.array([0,1,4,5])], epsilon)
        action1 = agent.act(s[np.array([2,3,4,5])], epsilon)
        s_next, reward_n, done, _ = env.step(action0, action1)
        done = False # Overwrite whatever may have come out before
        if frame_idx % 1000 == 0:
          done = True
        
        if reward_n is not None:
            reward = reward_n[0]
            pg = reward
            #pg = profit_gain(reward, nash_profit, colab_profit)[0] # important to index here
            agent.total_pg.append(pg)
            df.append({'vertdiff0': firm0['quality'], 'vertdiff1': firm1['quality'], 
                   'reward0': reward_n[0], 'reward1': reward_n[1], 'nash0': nash_profit[0],
                   'nash1': nash_profit[1], 'mon0': monopoly_profit[0], 'mon1': monopoly_profit[1]})
    
    df = pd.DataFrame(df)
    df.to_csv(saveto)
    return
Beispiel #8
0
def testing(firmlist, agent, params, eparams,saveto, cheat):
    '''
    testing tests a given agent and saves a dataframe to a file
    An instance of class Agent is tested in an economic environment determined
    by the firmlist, params, and eparams. Testng occurs by exposing the agent
    to all combinations of possible firms present in the firmlist argument and 
    results are measured by recording the profit, as well as important values
    used to evaluate how good this profit is. The recorded values are appended
    to a dictionary that is then turned into a pandas data frame that I export
    to a .csv file specified by the argument saveto
    INPUT:
        firmlist...a list of combinations of firms
        agent......object of class Agent that has an already trained network
        params.....dict of hyperparameters relating to code mechanics
        eparams....dict of hyperparameters relating to the economic env
        saveto.....string specifying to where dataframe should be saved.
        cheat......boolean. True if studying irf of a cheat. 
    OUTPUT:
        Function does not return anything; it does save a pandas data frame
        to file 'saveto' though. This file can be read R and be used to 
        produced graphical output.
    
    '''

    nA = params['nA']
    A0 = eparams['a0']
    MU = eparams['mu']
    
    nash_actions = actions_dict(nA, A0, MU, firmlist, firmlist, "nash")
    mon_actions = actions_dict(nA, A0, MU, firmlist, firmlist, "monopoly")
    
    firmprod = itertools.product(firmlist, firmlist)
    firmlist_cart = []
    for element in firmprod:
        firmlist_cart.append(element)
    

    firm0 = firmlist_cart[0][0]
    firm1 = firmlist_cart[0][0]
    env = ContBertrand(firm0, firm1, eparams)
    agent.net.eval()
    df = []
    
    # Make econ variables
    dict_key = str((firm0, firm1))
    nash_action = nash_actions[dict_key]
    monopoly_action = mon_actions[dict_key]
    nash_profit = profit(nash_action, A0, MU, firm0, firm1, nA)
    monopoly_profit = profit(monopoly_action, A0, MU, firm0, firm1, nA)
    
    # Initiate new env and amTFT agent
    s_next = env.reset(firm0, firm1, eparams)
    done = False
    frame_idx = 0
    firm_idx = 0
    epsilon = 0
    obs_firm0 = np.array([0,1])
    obs_firm1 = np.array([1,0])
    
    # For sequentiality, I need to initiate action1
    action1 = agent.act(s_next[obs_firm1], 0)
    action0 = agent.act(s_next[obs_firm0], 0)
    for t in range(1, (len(firmlist_cart)+1)*1000):
        if done:
            # Save episodal reward
            firm0 = firmlist_cart[firm_idx][0]
            firm1 = firmlist_cart[firm_idx][1]
            firm_idx += 1
            # Make econ variables
            dict_key = str((firm0, firm1))
            nash_action = nash_actions[dict_key]
            monopoly_action = mon_actions[dict_key]
            nash_profit = profit(nash_action, A0, MU, firm0, firm1, nA)
            monopoly_profit = profit(monopoly_action, A0, MU, firm0, firm1, nA)
            
            # Initiate new env
            s_next = env.reset(firm0, firm1, eparams)
            done = False
            
        frame_idx += 1
        s = s_next
        
        # Sequentiality of action choices
        if frame_idx % 2 == 0:
            # Update only agent0's action
            action0 = agent.act(s[obs_firm0], epsilon)
        else:
            # Update only agent1's action
            action1 = agent.act(s[obs_firm1], epsilon)
        
        if cheat and frame_idx == 100:
            action0 = 1
        
        s_next, reward_n, done, _ = env.step(action0, action1)
        done = False # Overwrite whatever may have come out before
        if frame_idx % 1000 == 0:
          done = True
        
        if reward_n is not None:
            df.append({'vertdiff0': firm0['quality'], 'vertdiff1': firm1['quality'], 
                   'reward0': reward_n[0], 'reward1': reward_n[1], 
                   'nash0': nash_profit[0], 'nash1': nash_profit[1], 
                   'mon0': monopoly_profit[0], 'mon1': monopoly_profit[1],
                   'index': frame_idx, 'firm_index': firm_idx})
    
    df = pd.DataFrame(df)
    df.to_csv(saveto)
    return