def reset(self): """Reset the learner .. warning:: This function should be called before the start of the game. """ self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num())] # current time step self.__time = 1
def reset(self): """Reset the learner .. warning:: This function should be called before the start of the game. """ self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num())] self.__best_arm = None self.__last_round = False
def update(self, feedback: Feedback): if self.__stage == 'main_loop': for arm_feedback in feedback.arm_feedbacks: self.__active_arms[arm_feedback.arm.id].update( np.array(arm_feedback.rewards)) # Initialization of median elimination self.__stage = 'median_elimination' # self.__me_ell = 1 self.__me_eps_ell = self.__eps_r / 8 self.__me_log_delta_ell = self.__log_delta_r - math.log(2) self.__me_eps_left = self.__eps_r / 2 self.__me_delta_left = math.exp(self.__log_delta_r) self.__me_active_arms = dict() for arm_id in self.__active_arms: self.__me_active_arms[arm_id] = PseudoArm() elif self.__stage == 'median_elimination': for arm_feedback in feedback.arm_feedbacks: self.__me_active_arms[arm_feedback.arm.id].update( np.array(arm_feedback.rewards)) if len(self.__me_active_arms) > self.__threshold: median = np.median( np.array([ pseudo_arm.em_mean for (arm_id, pseudo_arm) in self.__me_active_arms.items() ])) for (arm_id, pseudo_arm) in list(self.__me_active_arms.items()): if pseudo_arm.em_mean < median: del self.__me_active_arms[arm_id] self.__me_eps_left *= 0.75 self.__me_delta_left *= 0.5 self.__me_eps_ell *= 0.75 self.__me_log_delta_ell -= math.log(2) # self.__me_ell += 1 else: # Best arm returned by median elimination best_arm_by_me = argmax_or_min_tuple([ (pseudo_arm.em_mean, arm_id) for arm_id, pseudo_arm in self.__me_active_arms.items() ]) # Second half of 'main_loop' # Use estimated epsilon-best-arm to do elimination for (arm_id, pseudo_arm) in list(self.__active_arms.items()): if pseudo_arm.em_mean < self.__active_arms[ best_arm_by_me].em_mean - self.__eps_r: del self.__active_arms[arm_id] if len(self.__active_arms) == 1: self.__best_arm = list(self.__active_arms.keys())[0] self.__stage = 'main_loop' self.__round += 1 self.__eps_r /= 2 self.__log_delta_r = math.log( (1 - self.confidence) / 50) - 3 * math.log(self.__round)
def reset(self): self.__active_arms: Dict[int, PseudoArm] = dict() for arm_id in range(self.arm_num): self.__active_arms[arm_id] = PseudoArm() self.__budget_left = self.budget self.__best_arm = None self.__total_rounds = math.ceil(math.log(self.arm_num, 2)) # Current round # self.__round = 1 self.__stop = False
def reset(self): self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num)] # Parameters suggested by the paper self.__beta = 0.5 self.__a = 1 + 10 / self.arm_num self.__eps = 0 self.__delta = (1 - self.confidence) / 5 # Total number of pulls used self.__total_pulls = 0 self.__stage = 'initialization' self.__ucb = np.array([0.0] * self.arm_num)
def reset(self): self.__active_arms: Dict[int, PseudoArm] = dict() for arm_id in range(self.arm_num): self.__active_arms[arm_id] = PseudoArm() self.__best_arm = None # Current round index self.__round = 1 self.__stage = 'main_loop' # Main loop variables self.__eps_r = 0.125 self.__log_delta_r = math.log((1 - self.confidence) / 50)
def reset(self): # create only as many local arms as num_assigned_arms # entire algo behaves as if there are just num_assigned_arms in the bandit self.__pseudo_arms = [PseudoArm() for arm_id in self.__assigned_arms] # Parameters suggested by the paper self.__beta = 0.5 self.__a = 1 + 10 / len(self.__assigned_arms) self.__eps = 0 self.__delta = (1 - self.confidence) / 5 # Total number of pulls used self.__total_pulls = 0 self.__stage = 'initialization' self.__ucb = np.array([0.0] * len(self.__assigned_arms))
def reset(self): """Reset the learner .. warning:: This function should be called before the start of the game. """ self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num())] self.__active_arms = list(range(self.arm_num())) self.__budget_left = self.budget() self.__best_arm = None self.__total_rounds = math.ceil(math.log(self.arm_num(), 2)) # current round self.__round = 1 self.__last_round = False
def reset(self): """Reset the learner .. warning:: This function should be called before the start of the game. """ self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num())] # parameters suggested by the paper self.__beta = 0.5 self.__a = 1 + 10 / self.arm_num() self.__eps = 0 self.__delta = (1 - self.confidence()) / 5 # total number of pulls used self.__total_pulls = 0 self.__stage = 'initialization'
def update(self, feedback: Feedback): for arm_feedback in feedback.arm_feedbacks: self.__active_arms[arm_feedback.arm.id].update( np.array(arm_feedback.rewards)) self.__budget_left -= len(arm_feedback.rewards) if self.__stop: self.__best_arm = argmax_or_min_tuple([ (arm.em_mean, arm_id) for arm_id, arm in self.__active_arms.items() ]) else: # Remove half of the arms with the worst empirical means remaining_arms = sorted( self.__active_arms.items(), key=lambda x: x[1].em_mean, reverse=True)[:math.ceil(len(self.__active_arms) / 2)] self.__active_arms = dict((x, PseudoArm()) for x, _ in remaining_arms)
def median_elimination(self) -> List[Tuple[int, int]]: """ Returns: arms to pull in median elimination """ self.__me_pseudo_arms = [(arm_id, PseudoArm()) for arm_id in self.__me_active_arms] if len(self.__me_active_arms) <= self.__threshold: # uniform sampling pulls = math.ceil(0.5 / (self.__me_eps_left**2) * (math.log( 2 / self.__me_delta_left / len(self.__me_active_arms)))) else: pulls = math.ceil(4 / (self.__me_eps_ell**2) * (math.log(3) - self.__me_log_delta_ell)) actions = [(arm_id, pulls) for arm_id in self.__me_active_arms] return actions
def reset(self): # Calculate pulls assigned to each arm per round self.__pulls_per_round = [-1] nk = [0] for k in range(1, self.arm_num): nk.append( math.ceil(1 / self.__bar_log_K * (self.budget - self.arm_num) / (self.arm_num + 1 - k))) self.__pulls_per_round.append(nk[k] - nk[k - 1]) self.__active_arms: Dict[int, PseudoArm] = dict() for arm_id in range(self.arm_num): self.__active_arms[arm_id] = PseudoArm() self.__budget_left = self.budget self.__best_arm = None # Current round self.__round = 1
def reset(self): """Reset the learner .. warning:: This function should be called before the start of the game. """ self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num())] # calculate pulls_per_round self.__pulls_per_round = [-1] nk = [0] for k in range(1, self.arm_num()): nk.append( math.ceil(1 / self.__bar_log_K * (self.budget() - self.arm_num()) / (self.arm_num() + 1 - k))) self.__pulls_per_round.append(nk[k] - nk[k - 1]) self.__active_arms = set(range(self.arm_num())) self.__budget_left = self.budget() self.__best_arm = None # current round self.__round = 1
def actions(self, context: Context) -> Actions: if len(self.__active_arms) == 1: return Actions() actions: Actions if self.__stage == 'main_loop': actions = Actions() for arm_id in self.__active_arms: self.__active_arms[arm_id] = PseudoArm() pulls = math.ceil(2 / (self.__eps_r**2) * (math.log(2) - self.__log_delta_r)) for arm_id in self.__active_arms: arm_pull = actions.arm_pulls.add() arm_pull.arm.id = arm_id arm_pull.times = pulls else: # self.__stage == 'median_elimination' actions = self.__median_elimination() return actions
def actions(self, context=None) -> Optional[List[Tuple[int, int]]]: """ Args: context: context of the ordinary bandit which should be `None` Returns: arms to pull """ if len(self.__active_arms) == 1: self.__last_actions = None elif self.__stage == 'main_loop': self.__pseudo_arms = [(arm_id, PseudoArm()) for arm_id in self.__active_arms] pulls = math.ceil(2 / (self.__eps_r**2) * (math.log(2) - self.__log_delta_r)) self.__last_actions = [(arm_id, pulls) for arm_id in self.__active_arms] else: # self.__stage == 'median_elimination' self.__last_actions = self.median_elimination() return self.__last_actions
def __median_elimination(self) -> Actions: """ Returns: arms to pull in median elimination """ actions = Actions() for arm_id in self.__me_active_arms: self.__me_active_arms[arm_id] = PseudoArm() if len(self.__me_active_arms) <= self.__threshold: # Uniform sampling pulls = math.ceil( 0.5 / (self.__me_eps_left**2) * (math.log(2 / self.__me_delta_left / len(self.__me_active_arms)))) else: pulls = math.ceil(4 / (self.__me_eps_ell**2) * (math.log(3) - self.__me_log_delta_ell)) for arm_id in self.__me_active_arms: arm_pull = actions.arm_pulls.add() arm_pull.arm.id = arm_id arm_pull.times = pulls return actions
def reset(self): self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num)] self.__best_arm = None self.__stop = False
def reset(self): self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num)] # Current time step self.__time = 1
def reset(self): self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num)]