コード例 #1
0
  def update(self, feedback: Feedback):
    if self.__stage == 'main_loop':
      for arm_feedback in feedback.arm_feedbacks:
        self.__active_arms[arm_feedback.arm.id].update(
            np.array(arm_feedback.rewards))
      # Initialization of median elimination
      self.__stage = 'median_elimination'
      # self.__me_ell = 1
      self.__me_eps_ell = self.__eps_r / 8
      self.__me_log_delta_ell = self.__log_delta_r - math.log(2)
      self.__me_eps_left = self.__eps_r / 2
      self.__me_delta_left = math.exp(self.__log_delta_r)

      self.__me_active_arms = dict()
      for arm_id in self.__active_arms:
        self.__me_active_arms[arm_id] = PseudoArm()

    elif self.__stage == 'median_elimination':
      for arm_feedback in feedback.arm_feedbacks:
        self.__me_active_arms[arm_feedback.arm.id].update(
            np.array(arm_feedback.rewards))
      if len(self.__me_active_arms) > self.__threshold:
        median = np.median(
            np.array([
                pseudo_arm.em_mean
                for (arm_id, pseudo_arm) in self.__me_active_arms.items()
            ]))
        for (arm_id, pseudo_arm) in list(self.__me_active_arms.items()):
          if pseudo_arm.em_mean < median:
            del self.__me_active_arms[arm_id]

        self.__me_eps_left *= 0.75
        self.__me_delta_left *= 0.5
        self.__me_eps_ell *= 0.75
        self.__me_log_delta_ell -= math.log(2)
        # self.__me_ell += 1
      else:
        # Best arm returned by median elimination
        best_arm_by_me = argmax_or_min_tuple([
            (pseudo_arm.em_mean, arm_id)
            for arm_id, pseudo_arm in self.__me_active_arms.items()
        ])
        # Second half of 'main_loop'
        # Use estimated epsilon-best-arm to do elimination
        for (arm_id, pseudo_arm) in list(self.__active_arms.items()):
          if pseudo_arm.em_mean < self.__active_arms[
              best_arm_by_me].em_mean - self.__eps_r:
            del self.__active_arms[arm_id]

        if len(self.__active_arms) == 1:
          self.__best_arm = list(self.__active_arms.keys())[0]
        self.__stage = 'main_loop'
        self.__round += 1
        self.__eps_r /= 2
        self.__log_delta_r = math.log(
            (1 - self.confidence) / 50) - 3 * math.log(self.__round)
コード例 #2
0
 def update(self, feedback: Feedback):
   arm_feedback = feedback.arm_feedbacks[0]
   self.__pseudo_arms[arm_feedback.arm.id].update(
       np.array(arm_feedback.rewards))
   self.__time += 1
   if self.__best_arm < 0 and self.__time > self.__T_prime:
     self.__best_arm = argmax_or_min_tuple([
         (self.__pseudo_arms[arm_id].em_mean, arm_id)
         for arm_id in range(self.arm_num)
     ])
コード例 #3
0
ファイル: sh.py プロジェクト: sheelfshah/banditpylib
 def update(self, feedback: Feedback):
   for arm_feedback in feedback.arm_feedbacks:
     self.__active_arms[arm_feedback.arm.id].update(
         np.array(arm_feedback.rewards))
     self.__budget_left -= len(arm_feedback.rewards)
   if self.__stop:
     self.__best_arm = argmax_or_min_tuple([
         (arm.em_mean, arm_id) for arm_id, arm in self.__active_arms.items()
     ])
   else:
     # Remove half of the arms with the worst empirical means
     remaining_arms = sorted(
         self.__active_arms.items(), key=lambda x: x[1].em_mean,
         reverse=True)[:math.ceil(len(self.__active_arms) / 2)]
     self.__active_arms = dict((x, PseudoArm()) for x, _ in remaining_arms)
コード例 #4
0
    def update(self, feedback: Feedback):
        for arm_feedback in feedback.arm_feedbacks:
            self.__active_arms[arm_feedback.arm.id].update(
                np.array(arm_feedback.rewards))
            self.__budget_left -= len(arm_feedback.rewards)

        # Eliminate the arm with the smallest mean reward
        arm_id_to_remove = argmax_or_min_tuple(
            [(arm.em_mean, arm_id)
             for arm_id, arm in self.__active_arms.items()],
            find_min=True)
        del self.__active_arms[arm_id_to_remove]

        if self.__round == self.arm_num - 1:
            self.__best_arm = list(self.__active_arms.keys())[0]
        self.__round += 1
コード例 #5
0
ファイル: lilucb_heur.py プロジェクト: sheelfshah/banditpylib
 def best_arm(self) -> int:
   return argmax_or_min_tuple([
       (pseudo_arm.total_pulls, arm_id)
       for (arm_id, pseudo_arm) in enumerate(self.__pseudo_arms)
   ])
コード例 #6
0
 def best_arm(self) -> int:
     # map best arm local index to actual bandit index
     return self.__assigned_arms[argmax_or_min_tuple([
         (pseudo_arm.total_pulls, arm_id)
         for (arm_id, pseudo_arm) in enumerate(self.__pseudo_arms)
     ])]