def determineRewards(BSs, Agents, actions, variables, t=-1, t_cutoff=-1):
    x = determineWhichBSTransmitting(BSs, variables, t, t_cutoff);
    Ntx = Counter(actions)     #find number of UEs that chose each BS
    rewards = [0]*len(Agents)
    for i in range(0, len(Agents)):
        if x[actions[i]]  == 0: #BS it is connected to is not transmitting
            continue
        rewards[i] = environment.calculatecapacity(Agents[i], BSs[actions[i]], Ntx[actions[i]], variables, doFading=False)   
    return rewards
 def act(self, BSs, variables,Agents=None, t = -1):
     #calculate C to each BS
     caps = [environment.calculatecapacity(self, BSs[0], 1, variables, doFading=False), environment.calculatecapacity(self, BSs[1], 1, variables, doFading=False)]
     #multiply by K, 1-K 
     caps[0] = caps[0] * variables['K_coexistence']
     caps[1] = caps[1] * (1-(variables['K_coexistence']))
     #choose maximising
     action = np.argmax(caps)
         
     self.actions.append(action)
     return action
 def act(self, BSs, variables,Agents=None, t = -1):
     Tpatience = math.floor(variables['T_cutoff']/2)
     if t > Tpatience:     #Above T_patience, it does basic learning does
         p = 1-variables['p_explore'];
         if random.random() < p:
             avgOfEach = np.zeros(len(BSs))
             for i in range(0,len(BSs)):
                 indices = [ind for ind, j in enumerate(self.actions) if j == i]
                 avgOfEach[i] = np.Inf if len(indices)==0 else sum([self.rewards[j] for j in indices])/(float(len(indices)))
             action = np.argmax(avgOfEach)
         else:
             action = random.randint(0, len(BSs)-1)
     else:     #below a T_patience, it chooses the BS that would maximize K*C without other users
         #calculate C to each BS
         caps = [environment.calculatecapacity(self, BSs[0], 1, variables, doFading=False), environment.calculatecapacity(self, BSs[1], 1, variables, doFading=False)]
         #multiply by K, 1-K 
         caps[0] = caps[0] * variables['K_coexistence']
         caps[1] = caps[1] * (1-(variables['K_coexistence']))
         #choose maximising
         action = np.argmax(caps)
         
     self.actions.append(action)
     return action
Esempio n. 4
0
            else:
                AgentRewards[i] = AgentRewards[i] + np.array(Agents[i].rewards)
                AgentActions[i] = AgentActions[i] + np.array(Agents[i].actions)
    foundcorre = False
    if PLOTNEQ:
        AllMixedStrategyRewards = []
        AllMixedStrategyActions = []

        #        MixedStrategyRewards = np.zeros(variables['NumAgents'])
        CorrelatedEquilRewards = np.zeros(variables["NumAgents"])
        #        MixedStrategyActions = np.zeros(variables['NumAgents'])
        CorrelatedEquilActions = np.zeros(variables["NumAgents"])
        C = np.zeros((2, 2))
        for i in [0, 1]:
            for j in [0, 1]:
                C[i, j] = environment.calculatecapacity(Agents[i], BSs[j], 1, variables, doFading=False)

        for strat in analysis_helper.findPossibleStrategies(C, variables["K_coexistence"]):
            AllMixedStrategyRewards.append(
                [
                    analysis_helper.calcUtilityFromStrategy(0, strat, variables["K_coexistence"], C),
                    analysis_helper.calcUtilityFromStrategy(1, strat, variables["K_coexistence"], C),
                ]
            )
            AllMixedStrategyActions.append([strat[0][1], strat[1][1]])

        #       bestmixed = analysis_helper.findBestMixedStrategy(C, variables["K_coexistence"])
        #       MixedStrategyRewards = MixedStrategyRewards + [analysis_helper.calcUtilityFromStrategy(0, bestmixed, variables["K_coexistence"], C), analysis_helper.calcUtilityFromStrategy(1, bestmixed, variables["K_coexistence"], C)]
        #       MixedStrategyActions = MixedStrategyActions + [bestmixed[0][1], bestmixed[1][1]]

        foundcorre, corstrat, corrrewardsloc = analysis_helper.calc_correlated_equil(C, variables["K_coexistence"])