def determineRewards(BSs, Agents, actions, variables, t=-1, t_cutoff=-1): x = determineWhichBSTransmitting(BSs, variables, t, t_cutoff); Ntx = Counter(actions) #find number of UEs that chose each BS rewards = [0]*len(Agents) for i in range(0, len(Agents)): if x[actions[i]] == 0: #BS it is connected to is not transmitting continue rewards[i] = environment.calculatecapacity(Agents[i], BSs[actions[i]], Ntx[actions[i]], variables, doFading=False) return rewards
def act(self, BSs, variables,Agents=None, t = -1): #calculate C to each BS caps = [environment.calculatecapacity(self, BSs[0], 1, variables, doFading=False), environment.calculatecapacity(self, BSs[1], 1, variables, doFading=False)] #multiply by K, 1-K caps[0] = caps[0] * variables['K_coexistence'] caps[1] = caps[1] * (1-(variables['K_coexistence'])) #choose maximising action = np.argmax(caps) self.actions.append(action) return action
def act(self, BSs, variables,Agents=None, t = -1): Tpatience = math.floor(variables['T_cutoff']/2) if t > Tpatience: #Above T_patience, it does basic learning does p = 1-variables['p_explore']; if random.random() < p: avgOfEach = np.zeros(len(BSs)) for i in range(0,len(BSs)): indices = [ind for ind, j in enumerate(self.actions) if j == i] avgOfEach[i] = np.Inf if len(indices)==0 else sum([self.rewards[j] for j in indices])/(float(len(indices))) action = np.argmax(avgOfEach) else: action = random.randint(0, len(BSs)-1) else: #below a T_patience, it chooses the BS that would maximize K*C without other users #calculate C to each BS caps = [environment.calculatecapacity(self, BSs[0], 1, variables, doFading=False), environment.calculatecapacity(self, BSs[1], 1, variables, doFading=False)] #multiply by K, 1-K caps[0] = caps[0] * variables['K_coexistence'] caps[1] = caps[1] * (1-(variables['K_coexistence'])) #choose maximising action = np.argmax(caps) self.actions.append(action) return action
else: AgentRewards[i] = AgentRewards[i] + np.array(Agents[i].rewards) AgentActions[i] = AgentActions[i] + np.array(Agents[i].actions) foundcorre = False if PLOTNEQ: AllMixedStrategyRewards = [] AllMixedStrategyActions = [] # MixedStrategyRewards = np.zeros(variables['NumAgents']) CorrelatedEquilRewards = np.zeros(variables["NumAgents"]) # MixedStrategyActions = np.zeros(variables['NumAgents']) CorrelatedEquilActions = np.zeros(variables["NumAgents"]) C = np.zeros((2, 2)) for i in [0, 1]: for j in [0, 1]: C[i, j] = environment.calculatecapacity(Agents[i], BSs[j], 1, variables, doFading=False) for strat in analysis_helper.findPossibleStrategies(C, variables["K_coexistence"]): AllMixedStrategyRewards.append( [ analysis_helper.calcUtilityFromStrategy(0, strat, variables["K_coexistence"], C), analysis_helper.calcUtilityFromStrategy(1, strat, variables["K_coexistence"], C), ] ) AllMixedStrategyActions.append([strat[0][1], strat[1][1]]) # bestmixed = analysis_helper.findBestMixedStrategy(C, variables["K_coexistence"]) # MixedStrategyRewards = MixedStrategyRewards + [analysis_helper.calcUtilityFromStrategy(0, bestmixed, variables["K_coexistence"], C), analysis_helper.calcUtilityFromStrategy(1, bestmixed, variables["K_coexistence"], C)] # MixedStrategyActions = MixedStrategyActions + [bestmixed[0][1], bestmixed[1][1]] foundcorre, corstrat, corrrewardsloc = analysis_helper.calc_correlated_equil(C, variables["K_coexistence"])