def fql_get_action(self, clients): others_below, serverload, client_load = self.get_fuzzy_variables( clients) firing = settings.FLS.get_rules_firing(others_below, serverload, client_load) indexes = [] index_action = dict() takagi_sugeno = 0 denominator = functools.reduce(lambda x, y: x + y, firing, 0) fql_q_function = 0 fql_v_function = 0 for i, v in enumerate(firing): if v > 0.0: indexes.append(i) for index in indexes: # index da regra, index do fql index_action[index] = getattr(self, f"fql_{settings.EXPLORATION}")(index) Mlog.DEBUG( "FIRING:", firing, ) Mlog.DEBUG( "INDEX:", index, ) Mlog.DEBUG("action_index:", index_action[index], self.actions) Mlog.DEBUG("INDEXES:", len(firing), len(self.actions), index_action[index]) takagi_sugeno += self.actions[index_action[index]] * firing[index] fql_q_function += self.q_table[index][ index_action[index]] * firing[index] fql_v_function += self.q_table[index][getattr( self, f"fql_{settings.EXPLORATION}")( index, force_max=True)] * firing[index] takagi_sugeno /= denominator fql_q_function /= denominator fql_v_function /= denominator self.fql_q_function = fql_q_function self.fql_v_function = fql_v_function if self.first: self.first = False else: reward = self.get_reward(clients, self.old_action) if settings.STRATEGY.count("fsl"): delta_q = reward + self.GAMMA * self.fql_q_function - self.old_fql_q_function else: delta_q = reward + self.GAMMA * self.fql_v_function - self.old_fql_q_function for index in self.old_index_action: # tem que ser causal! O atualizado não é o q-value atual, é o anterior! self.q_table[index][ self.old_index_action[index]] += delta_q * firing[index] self.old_fql_q_function = fql_q_function self.old_index_action = index_action self.old_action = takagi_sugeno return takagi_sugeno
def action_map(self, _range=1): self.actions = set() for i in range(_range + 1): a = i * 100 / settings.ACTIONS Mlog.INFO("ACTION: ", a) self.actions.add(a) self.actions.add(-a) self.actions = tuple(self.actions) Mlog.INFO('THE ACTIONS: ', self.actions) return self.actions
def fql_eep(self, index, force_max=False): actions = self.q_table[index] """ Exploration/Exploitation Process """ if random.uniform(0, 1) < self.EPSILON and not force_max: action = random.randint(0, len(actions) - 1) else: Mlog.DEBUG("Max action index: ", np.argmax(actions)) action = np.argmax(actions) return action
def sum_rate(self, num): if num > 0: num_str = Bcolors.change(Bcolors.OKBLUE, num) else: num_str = Bcolors.change(Bcolors.FAIL, num) Mlog.DEBUG('INCREASE BY: ', num_str) new_rate = int(self.get_rate()) + num #print(num, self.get_rate(), new_rate, self.bw) #if new_rate * 1000 >= self.bw: self.rapi.change_meter(self.dpid, self.id, new_rate)
def choose_action(self): """Toma decisão embasada no que foi aprendido (Escolhe da Q-table)""" self.state = self.get_current_state() line = self.q_table[self.state - 1] Mlog.DEBUG([f"{i}:{line[i]}" for i in range(len(line))]) if 0 in self.q_table[self.state - 1]: return self.sample_action() action = np.argmax(self.q_table[self.state - 1]) if self.q_table[self.state - 1][action]: return action else: return self.sample_action()
def get_reward(self, clients, old_action): """ recuperar a recompensa para uma mudança de estado """ reward = 0 gain1 = self.client.get_gain1() #gain2 = self.client.get_gain2() self.client.update() #print(self.dc.load, self.dc.cap, self.actions[self.old_action]) if settings.STRATEGY in ('fql_step', 'fsl_step'): old_action = self.old_action else: old_action = self.actions[self.old_action] if self.dc.load >= self.dc.cap: Mlog.INFO("SERVER GREATER", self.dc.load, self.dc.cap) if old_action >= 0: reward = -3 #*(self.dc.load/self.dc.cap) return reward else: reward = 1 #* (self.dc.load / self.dc.cap) return reward if self.client.nbw < 0.5 * self.client.bw: #Mlog.INFO('OLD ACTION', self.client.id, old_action) if old_action >= 0: reward = 0.7 #*self.client.nbw/self.client.bw else: #Mlog.INFO("NEGATIVE REWARD BY CLIENT UNDER SLA") reward = -4 #*self.client.nbw/self.client.bw return reward if self.client.nbw < self.client.bw: if old_action >= 0: #print() reward = 0.5 #* self.client.nbw / self.client.bw return reward else: #Mlog.INFO("NEGATIVE REWARD BY CLIENT UNDER SLA 2") reward = -2 #* self.client.nbw / self.client.bw return reward #if self.SLA_PROBLEM: #return -0.7*self.client.nbw/self.client.bw return -0.7 * self.client.nbw / self.client.bw reward += gain1 if gain1 < 1: return -reward return reward
def get_current_state(self): if settings.STATE_TYPE == 'val': # OLD state = int(self.dc.load / (self.dc.cap / self.OBSERVATION_SPACE)) elif settings.STATE_TYPE == 'diff': state = int( self.dc.load / (self.old_dc_load * self.dc.cap / self.OBSERVATION_SPACE)) self.old_dc_load = self.dc.load elif settings.STATE_TYPE == 'fuzzy': cpu = psutil.cpu_percent() load = self.dc.load state = int( self.fls.get_state(cpu=psutil.cpu_percent(), bw=self.dc.load)) Mlog.INFO('FUZZY INPUT: ', cpu, load, state) if state > self.OBSERVATION_SPACE: return self.dump_state(self.OBSERVATION_SPACE) return self.dump_state(state)
def fql_softmax(self, index, force_max=False): actions = self.q_table[index] """ Exploration/Exploitation Process """ l = actions if not force_max: numerator = [math.exp(num / self.TEMPERATURE) for num in l] denominator = sum(numerator) prob = [num / denominator for num in numerator] probp = [0, prob[0]] for i in range(len(prob)): if i not in [0, len(prob) - 1]: probp.append(sum(prob[0:i + 1])) rand = random.random() for i in probp: if rand >= i: ret = probp.index(i) continue else: break return ret else: Mlog.DEBUG("Max action index: ", np.argmax(actions)) action = np.argmax(actions) return action
def do_fuzzy_action(self, value): increase_by = int( (value / 100) * 0.3 * settings.MAX_SERVER_LOAD / 1000) Mlog.DEBUG("DO FUZZY ACTION, INCREASE BY =", increase_by) self.client.sum_rate(increase_by)