def update(self, feedback: Feedback): if self.__stage == 'main_loop': for arm_feedback in feedback.arm_feedbacks: self.__active_arms[arm_feedback.arm.id].update( np.array(arm_feedback.rewards)) # Initialization of median elimination self.__stage = 'median_elimination' # self.__me_ell = 1 self.__me_eps_ell = self.__eps_r / 8 self.__me_log_delta_ell = self.__log_delta_r - math.log(2) self.__me_eps_left = self.__eps_r / 2 self.__me_delta_left = math.exp(self.__log_delta_r) self.__me_active_arms = dict() for arm_id in self.__active_arms: self.__me_active_arms[arm_id] = PseudoArm() elif self.__stage == 'median_elimination': for arm_feedback in feedback.arm_feedbacks: self.__me_active_arms[arm_feedback.arm.id].update( np.array(arm_feedback.rewards)) if len(self.__me_active_arms) > self.__threshold: median = np.median( np.array([ pseudo_arm.em_mean for (arm_id, pseudo_arm) in self.__me_active_arms.items() ])) for (arm_id, pseudo_arm) in list(self.__me_active_arms.items()): if pseudo_arm.em_mean < median: del self.__me_active_arms[arm_id] self.__me_eps_left *= 0.75 self.__me_delta_left *= 0.5 self.__me_eps_ell *= 0.75 self.__me_log_delta_ell -= math.log(2) # self.__me_ell += 1 else: # Best arm returned by median elimination best_arm_by_me = argmax_or_min_tuple([ (pseudo_arm.em_mean, arm_id) for arm_id, pseudo_arm in self.__me_active_arms.items() ]) # Second half of 'main_loop' # Use estimated epsilon-best-arm to do elimination for (arm_id, pseudo_arm) in list(self.__active_arms.items()): if pseudo_arm.em_mean < self.__active_arms[ best_arm_by_me].em_mean - self.__eps_r: del self.__active_arms[arm_id] if len(self.__active_arms) == 1: self.__best_arm = list(self.__active_arms.keys())[0] self.__stage = 'main_loop' self.__round += 1 self.__eps_r /= 2 self.__log_delta_r = math.log( (1 - self.confidence) / 50) - 3 * math.log(self.__round)
def update(self, feedback: Feedback): arm_feedback = feedback.arm_feedbacks[0] self.__pseudo_arms[arm_feedback.arm.id].update( np.array(arm_feedback.rewards)) self.__time += 1 if self.__best_arm < 0 and self.__time > self.__T_prime: self.__best_arm = argmax_or_min_tuple([ (self.__pseudo_arms[arm_id].em_mean, arm_id) for arm_id in range(self.arm_num) ])
def update(self, feedback: Feedback): for arm_feedback in feedback.arm_feedbacks: self.__active_arms[arm_feedback.arm.id].update( np.array(arm_feedback.rewards)) self.__budget_left -= len(arm_feedback.rewards) if self.__stop: self.__best_arm = argmax_or_min_tuple([ (arm.em_mean, arm_id) for arm_id, arm in self.__active_arms.items() ]) else: # Remove half of the arms with the worst empirical means remaining_arms = sorted( self.__active_arms.items(), key=lambda x: x[1].em_mean, reverse=True)[:math.ceil(len(self.__active_arms) / 2)] self.__active_arms = dict((x, PseudoArm()) for x, _ in remaining_arms)
def update(self, feedback: Feedback): for arm_feedback in feedback.arm_feedbacks: self.__active_arms[arm_feedback.arm.id].update( np.array(arm_feedback.rewards)) self.__budget_left -= len(arm_feedback.rewards) # Eliminate the arm with the smallest mean reward arm_id_to_remove = argmax_or_min_tuple( [(arm.em_mean, arm_id) for arm_id, arm in self.__active_arms.items()], find_min=True) del self.__active_arms[arm_id_to_remove] if self.__round == self.arm_num - 1: self.__best_arm = list(self.__active_arms.keys())[0] self.__round += 1
def best_arm(self) -> int: return argmax_or_min_tuple([ (pseudo_arm.total_pulls, arm_id) for (arm_id, pseudo_arm) in enumerate(self.__pseudo_arms) ])
def best_arm(self) -> int: # map best arm local index to actual bandit index return self.__assigned_arms[argmax_or_min_tuple([ (pseudo_arm.total_pulls, arm_id) for (arm_id, pseudo_arm) in enumerate(self.__pseudo_arms) ])]