コード例 #1
0
    def actions(self, context=None) -> Actions:
        del context
        if self.__stage == 'initialization':
            actions = Actions()  # default state is normal

            # 1 pull each for every assigned arm
            for arm_id in self.__assigned_arms:
                arm_pull = actions.arm_pulls.add()
                arm_pull.arm.id = arm_id
                arm_pull.times = 1
            return actions

        # self.__stage == 'main'
        actions = Actions()

        for pseudo_arm in self.__pseudo_arms:
            if pseudo_arm.total_pulls >= (
                    1 + self.__a *
                (self.__total_pulls - pseudo_arm.total_pulls)):
                return actions

        arm_pull = actions.arm_pulls.add()

        # map local arm index to the bandits arm index
        arm_pull.arm.id = self.__assigned_arms[int(np.argmax(self.__ucb))]
        arm_pull.times = 1

        return actions
コード例 #2
0
ファイル: ucb_test.py プロジェクト: sheelfshah/banditpylib
    def test_simple_run(self):
        arm_num = 5
        horizon = 10
        learner = UCB(arm_num=arm_num)
        learner.reset()
        mock_ucb = np.array([1.2, 1, 1, 1, 1])
        # pylint: disable=protected-access
        learner._UCB__UCB = MagicMock(return_value=mock_ucb)

        # During the initial time steps, each arm is pulled once
        for time in range(1, arm_num + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: {arm_id}
          >
          times: 1
        >
        """.format(arm_id=time - 1), Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: {arm_id}
          >
          rewards: 0
        >
        """.format(arm_id=time - 1), Feedback()))
        # For the left time steps, arm 0 is always the choice
        for _ in range(arm_num + 1, horizon + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: 0
          >
          times: 1
        >
        """, Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: 0
          >
          rewards: 0
        >
        """, Feedback()))
コード例 #3
0
    def test_simple_run(self):
        means = [0, 0.5, 0.7, 1]
        arms = [BernoulliArm(mean) for mean in means]
        learner = EpsGreedy(arm_num=len(arms))
        learner.reset()

        # Pull each arm once during the initial steps
        for time in range(1, len(arms) + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: {arm_id}
          >
          times: 1
        >
        """.format(arm_id=time - 1), Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: {arm_id}
          >
          rewards: 0
        >
        """.format(arm_id=time - 1), Feedback()))
コード例 #4
0
ファイル: sh.py プロジェクト: sheelfshah/banditpylib
  def actions(self, context: Context) -> Actions:
    del context

    actions = Actions()

    if self.__stop:
      return actions

    if len(self.__active_arms) <= self.__threshold:
      # Uniform sampling
      pulls = np.random.multinomial(self.__budget_left,
                                    np.ones(len(self.__active_arms)) /
                                    len(self.__active_arms),
                                    size=1)[0]
      i = 0
      for arm_id in self.__active_arms:
        arm_pull = actions.arm_pulls.add()
        arm_pull.arm.id = arm_id
        arm_pull.times = pulls[i]
        i = i + 1
      self.__stop = True
    else:
      # Pulls assigned to each arm
      pulls = math.floor(self.budget /
                         (len(self.__active_arms) * self.__total_rounds))
      for arm_id in self.__active_arms:
        arm_pull = actions.arm_pulls.add()
        arm_pull.arm.id = arm_id
        arm_pull.times = pulls

    return actions
コード例 #5
0
ファイル: ucb.py プロジェクト: sheelfshah/banditpylib
    def actions(self, context: Context) -> Actions:
        del context

        actions = Actions()
        arm_pull = actions.arm_pulls.add()

        # Check if last observation is a purchase
        if self.__last_customer_feedback and self.__last_customer_feedback != 0:
            return self.__last_actions
        # When a non-purchase observation happens, a new episode is started and
        # a new assortment to be served is calculated
        self.reward.set_preference_params(self.__UCB())
        # Calculate assortment with the maximum reward using optimistic
        # preference parameters
        if self.use_local_search:
            _, best_assortment = local_search_best_assortment(
                reward=self.reward,
                random_neighbors=self.random_neighbors,
                card_limit=self.card_limit,
                init_assortment=(set(
                    self.__last_actions.arm_pulls[0].arm.set.id)
                                 if self.__last_actions else None))
        else:
            _, best_assortment = search_best_assortment(
                reward=self.reward, card_limit=self.card_limit)

        arm_pull.arm.set.id.extend(list(best_assortment))
        arm_pull.times = 1

        self.__last_actions = actions
        return actions
コード例 #6
0
    def test_simple_run(self):
        arm_num = 5
        horizon = 10
        learner = Uniform(arm_num=arm_num)
        learner.reset()

        for time in range(1, horizon + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: {arm_id}
          >
          times: 1
        >
        """.format(arm_id=(time - 1) % arm_num),
                    Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: 0
          >
          rewards: 0
        >
        """, Feedback()))
コード例 #7
0
ファイル: uniform.py プロジェクト: sheelfshah/banditpylib
    def actions(self, context: Context) -> Actions:
        del context

        actions = Actions()
        arm_pull = actions.arm_pulls.add()
        arm_pull.arm.id = (self.__time - 1) % self.arm_num
        arm_pull.times = 1
        return actions
コード例 #8
0
    def test_simple_run(self):
        revenues = np.array([0, 0.7, 0.8, 0.9, 1.0])
        horizon = 100
        reward = CvarReward(0.7)
        learner = ThompsonSampling(revenues=revenues,
                                   horizon=horizon,
                                   reward=reward)

        # Test warm start
        learner.reset()
        assert learner.actions(
            Context()).SerializeToString() == text_format.Parse(
                """
      arm_pulls {
        arm {
          set {
            id: 1
          }
        }
        times: 1
      }
      """, Actions()).SerializeToString()

        learner.reset()
        # pylint: disable=protected-access
        learner._ThompsonSampling__within_warm_start = MagicMock(
            return_value=False)
        mock_preference_params = np.array([1, 1, 1, 1, 1])
        learner._ThompsonSampling__correlated_sampling = MagicMock(
            return_value=mock_preference_params)
        assert learner.actions(
            Context()).SerializeToString() == text_format.Parse(
                """
      arm_pulls {
        arm {
          set {
            id: 1
            id: 2
            id: 3
            id: 4
          }
        }
        times: 1
      }
      """, Actions()).SerializeToString()
コード例 #9
0
    def actions(self, context: Context) -> Actions:
        del context

        actions = Actions()
        arm_pull = actions.arm_pulls.add()
        arm_pull.arm.id = self.__sample_from_beta_prior(
        ) if self.__prior_dist == 'beta' else self.__sample_from_gaussian_prior(
        )
        arm_pull.times = 1
        return actions
コード例 #10
0
ファイル: apt.py プロジェクト: sheelfshah/banditpylib
    def actions(self, context: Context) -> Actions:
        actions = Actions()
        arm_pull = actions.arm_pulls.add()

        if self.__time <= self.arm_num:
            arm_pull.arm.id = self.__time - 1
        else:
            arm_pull.arm.id = int(np.argmin(self.__metrics()))

        arm_pull.times = 1
        return actions
コード例 #11
0
  def actions(self, context: Context) -> Actions:
    del context

    actions = Actions()
    arm_pull = actions.arm_pulls.add()

    ucb = self.__LinUCB()
    arm_pull.arm.id = int(np.argmax(ucb, axis=0))

    arm_pull.times = 1
    return actions
コード例 #12
0
    def actions(self, context: Context) -> Actions:
        del context

        actions = Actions()
        arm_pull = actions.arm_pulls.add()
        self.__probabilities = (1 - self.__gamma) * self.__weights / sum(
            self.__weights) + self.__gamma / self.arm_num
        arm_pull.arm.id = np.random.choice(self.arm_num,
                                           1,
                                           p=self.__probabilities)[0]
        arm_pull.times = 1
        return actions
コード例 #13
0
  def actions(self, context: Context) -> Actions:
    if len(self.__active_arms) == 1:
      return Actions()

    actions: Actions
    if self.__stage == 'main_loop':
      actions = Actions()
      for arm_id in self.__active_arms:
        self.__active_arms[arm_id] = PseudoArm()

      pulls = math.ceil(2 / (self.__eps_r**2) *
                        (math.log(2) - self.__log_delta_r))
      for arm_id in self.__active_arms:
        arm_pull = actions.arm_pulls.add()
        arm_pull.arm.id = arm_id
        arm_pull.times = pulls
    else:
      # self.__stage == 'median_elimination'
      actions = self.__median_elimination()

    return actions
コード例 #14
0
ファイル: lilucb_heur.py プロジェクト: sheelfshah/banditpylib
  def actions(self, context: Context) -> Actions:
    if self.__stage == 'initialization':
      actions = Actions()
      for arm_id in range(self.arm_num):
        arm_pull = actions.arm_pulls.add()
        arm_pull.arm.id = arm_id
        arm_pull.times = 1
      return actions

    # self.__stage == 'main'
    actions = Actions()

    for pseudo_arm in self.__pseudo_arms:
      if pseudo_arm.total_pulls >= (
          1 + self.__a * (self.__total_pulls - pseudo_arm.total_pulls)):
        return actions

    arm_pull = actions.arm_pulls.add()
    arm_pull.arm.id = int(np.argmax(self.__ucb))
    arm_pull.times = 1

    return actions
コード例 #15
0
  def actions(self, context: Context) -> Actions:
    del context

    actions = Actions()
    arm_pull = actions.arm_pulls.add()

    if self.__time <= self.__T_prime:
      arm_pull.arm.id = (self.__time - 1) % self.arm_num
    else:
      arm_pull.arm.id = self.__best_arm

    arm_pull.times = 1
    return actions
コード例 #16
0
ファイル: moss.py プロジェクト: sheelfshah/banditpylib
    def actions(self, context: Context) -> Actions:
        del context

        actions = Actions()
        arm_pull = actions.arm_pulls.add()

        if self.__time <= self.arm_num:
            arm_pull.arm.id = self.__time - 1
        else:
            arm_pull.arm.id = int(np.argmax(self.__MOSS()))

        arm_pull.times = 1
        return actions
コード例 #17
0
  def actions(self, context: Context = None) -> Actions:
    if self.__stage == self.UNASSIGNED:
      raise Exception("%s: I can\'t act in stage unassigned." % self.name)

    if self.__stage == self.CENTRALIZED_LEARNING:
      if self.__round_index > 0:
        raise Exception("Expected centralized learning in round 0. Got %d." %
                        self.__round_index)

      if self.__central_algo.get_total_pulls(
      ) >= self.__num_pulls_per_round[0]:
        # Early stop the centralized algorithm when it uses more than horizon
        # / 2 pulls.
        self.__stage = self.LEARNING
        self.__arm_to_broadcast = np.random.choice(self.__assigned_arms)
        self.__round_index += 1
        return self.actions()

      if len(self.__assigned_arms) == 1:
        self.__stage = self.LEARNING
        self.__arm_to_broadcast = self.__assigned_arms[0]
        self.__round_index += 1
        return self.actions()

      central_algo_actions = self.__central_algo.actions()
      if not central_algo_actions.arm_pulls:
        # Centralized algorithm terminates before using up horizon / 2 pulls
        self.__stage = self.LEARNING
        self.__arm_to_broadcast = self.__central_algo.best_arm
        self.__round_index += 1
        return self.actions()
      return central_algo_actions
    elif self.__stage == self.LEARNING:
      actions = Actions()
      arm_pull = actions.arm_pulls.add()
      arm_pull.arm.id = self.__arm_to_broadcast
      arm_pull.times = self.__num_pulls_per_round[self.__round_index]
      return actions
    elif self.__stage == self.COMMUNICATION:
      actions = Actions()
      actions.state = Actions.WAIT
      return actions
    else:
      # self.__stage == self.TERMINATION
      actions = Actions()
      actions.state = Actions.STOP
      return actions
コード例 #18
0
ファイル: ts_test.py プロジェクト: sheelfshah/banditpylib
 def test_simple_run(self):
   ts_learner = ThompsonSampling(arm_num=4)
   ts_learner.reset()
   # pylint: disable=protected-access
   ts_learner._ThompsonSampling__sample_from_beta_prior = MagicMock(
       return_value=1)
   # always pull arm 1
   assert ts_learner.actions(
       Context()).SerializeToString() == text_format.Parse(
           """
       arm_pulls <
         arm <
           id: 1
         >
         times: 1
       >
       """, Actions()).SerializeToString()
コード例 #19
0
ファイル: ts.py プロジェクト: sheelfshah/banditpylib
    def __warm_start(self) -> Actions:
        """Initial warm start stage

    Returns:
      assortments to serve in the warm start stage
    """
        # Check if last observation is a purchase
        if self.__last_customer_feedback and self.__last_customer_feedback != 0:
            # Continue serving the same assortment
            return self.__last_actions

        actions = Actions()
        arm_pull = actions.arm_pulls.add()
        arm_pull.arm.set.id.append(self.__next_product_in_warm_start)
        arm_pull.times = 1
        self.__next_product_in_warm_start += 1
        return actions
コード例 #20
0
ファイル: ts.py プロジェクト: sheelfshah/banditpylib
    def actions(self, context: Context) -> Actions:
        del context

        actions: Actions

        # Check if still in warm start stage
        if self.__within_warm_start():
            actions = self.__warm_start()
        else:
            actions = Actions()
            arm_pull = actions.arm_pulls.add()

            # Check if last observation is a purchase
            if self.__last_customer_feedback and self.__last_customer_feedback != 0:
                # Continue serving the same assortment
                return self.__last_actions

            # When a non-purchase observation happens, a new episode is started. Also
            # a new assortment to be served using new estimate of preference
            # parameters is generated.
            # Set preference parameters generated by thompson sampling
            self.reward.set_preference_params(self.__correlated_sampling())
            # Calculate best assortment using the generated preference parameters
            if self.use_local_search:
                # Initial assortment to start for local search
                if self.__last_actions is not None:
                    init_assortment = set(
                        self.__last_actions.arm_pulls[0].arm.set.id)
                else:
                    init_assortment = None
                _, best_assortment = local_search_best_assortment(
                    reward=self.reward,
                    random_neighbors=self.random_neighbors,
                    card_limit=self.card_limit,
                    init_assortment=init_assortment)
            else:
                _, best_assortment = search_best_assortment(
                    reward=self.reward, card_limit=self.card_limit)

            arm_pull.arm.set.id.extend(list(best_assortment))
            arm_pull.times = 1

            # self.__first_step_after_warm_start = False

        self.__last_actions = actions
        return actions
コード例 #21
0
    def actions(self, context: Context) -> Actions:
        del context

        actions = Actions()
        arm_pull = actions.arm_pulls.add()

        if self.__time <= self.arm_num:
            arm_pull.arm.id = self.__time - 1
        # With probability eps/t, randomly select an arm to pull
        elif np.random.random() <= self.__eps / self.__time:
            arm_pull.arm.id = np.random.randint(0, self.arm_num)
        else:
            arm_pull.arm.id = int(
                np.argmax(np.array([arm.em_mean
                                    for arm in self.__pseudo_arms])))

        arm_pull.times = 1
        return actions
コード例 #22
0
  def actions(self, context: Context) -> Actions:
    del context

    actions = Actions()
    arm_pull = actions.arm_pulls.add()

    if self.__time <= self.arm_num:
      arm_pull.arm.id = self.__time - 1
    else:
      weights = np.array([
          math.exp(self.__pseudo_arms[arm_id].em_mean / self.__gamma)
          for arm_id in range(self.arm_num)
      ])
      arm_pull.arm.id = np.random.choice(
          self.arm_num, 1, p=[weight / sum(weights) for weight in weights])[0]

    arm_pull.times = 1
    return actions
コード例 #23
0
ファイル: uniform.py プロジェクト: sheelfshah/banditpylib
  def actions(self, context: Context) -> Actions:
    del context

    actions = Actions()

    if not self.__stop:
      # Make sure each arm is sampled at least once
      pulls = np.random.multinomial(self.budget - self.arm_num,
                                    np.ones(self.arm_num) / self.arm_num,
                                    size=1)[0]
      for arm_id in range(self.arm_num):
        arm_pull = actions.arm_pulls.add()
        arm_pull.arm.id = arm_id
        arm_pull.times = pulls[arm_id] + 1

      self.__stop = True

    return actions
コード例 #24
0
 def test_one_product(self):
     preference_params = [1.0, 0.0]
     revenues = [0.0, 1.0]
     bandit = MNLBandit(preference_params, revenues)
     bandit.reset()
     # Always get no purchase
     assert set(
         bandit.feed(
             text_format.Parse(
                 """
   arm_pulls {
     arm {
       set {
         id: 1
       }
     }
     times: 5
   }
   """, Actions())).arm_feedbacks[0].customer_feedbacks) == {0}
コード例 #25
0
    def actions(self, context: Context) -> Actions:
        del context

        actions = Actions()

        if self.__round < self.arm_num:
            if self.__round < self.arm_num - 1:
                for arm_id in self.__active_arms:
                    arm_pull = actions.arm_pulls.add()
                    arm_pull.arm.id = arm_id
                    arm_pull.times = self.__pulls_per_round[self.__round]
            else:
                # Use up the remaining budget when there are only two arms left
                pulls = [self.__budget_left // 2]
                pulls.append(self.__budget_left - pulls[0])
                for i in range(2):
                    arm_pull = actions.arm_pulls.add()
                    arm_pull.arm.id = list(self.__active_arms.keys())[i]
                    arm_pull.times = pulls[i]
        return actions
コード例 #26
0
 def test_simple_run(self):
     means = [0, 1]
     arms = [BernoulliArm(mean) for mean in means]
     ordinary_bandit = MultiArmedBandit(arms)
     ordinary_bandit.reset()
     # Pull arm 0 for 100 times
     actions = text_format.Parse(
         """
   arm_pulls {
     arm {
       id: 0
     }
     times: 100
   }
   """, Actions())
     ordinary_bandit.feed(actions)
     assert ordinary_bandit.regret(MaximizeTotalRewards()) == 100
     arm = Arm()
     arm.id = 1
     assert ordinary_bandit.regret(IdentifyBestArm(best_arm=arm)) == 0
コード例 #27
0
    def test_contextual_bandit(self):
        arm_num = 10
        dimension = 10
        contextual_bandit = ContextualBandit(
            RandomContextGenerator(arm_num=arm_num, dimension=dimension))
        contextual_bandit.reset()
        for _ in range(20):
            _ = contextual_bandit.context
            contextual_bandit.feed(
                text_format.Parse(
                    """
      arm_pulls {
        arm {
          id: 1
        }
        times: 1
      }
      """, Actions()))

        assert contextual_bandit.regret(MaximizeTotalRewards()) <= 20
コード例 #28
0
 def test_regret(self):
     preference_params = np.array(
         [1.0, 1.0, 1.0, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5])
     revenues = np.array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
     card_limit = 1
     bandit = MNLBandit(preference_params, revenues, card_limit)
     bandit.reset()
     # Serve best assortment {1} for 3 times
     bandit.feed(
         text_format.Parse(
             """
   arm_pulls {
     arm {
       set {
         id: 1
       }
     }
     times: 3
   }
   """, Actions()))
     assert bandit.regret(MaximizeTotalRewards()) == 0.0
コード例 #29
0
    def test_simple_run(self):
        horizon = 10
        features = [
            np.array([1, 0]),
            np.array([1, 0]),
            np.array([1, 0]),
            np.array([1, 0]),
            np.array([0, 1])
        ]
        learner = LinUCB(features, 0.1, 1e-3)
        learner.reset()
        mock_ucb = np.array([1.2, 1, 1, 1, 1])
        # pylint: disable=protected-access
        learner._LinUCB__LinUCB = MagicMock(return_value=mock_ucb)

        # Always 0th arm is picked
        # not the most efficient test
        for _ in range(1, horizon + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
            arm_pulls <
              arm <
                id: 0
              >
              times: 1
            >
            """, Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
            arm_feedbacks <
              arm <
                id: 0
              >
              rewards: 0
            >
            """, Feedback()))
コード例 #30
0
 def test_simple_run(self):
     revenues = np.array([0, 0.45, 0.8, 0.9, 1.0])
     reward = MeanReward()
     learner = EpsGreedy(revenues=revenues, reward=reward)
     learner.reset()
     mock_random_assortment = {2, 3, 4}
     # pylint: disable=protected-access
     learner._EpsGreedy__select_ramdom_assort = MagicMock(
         return_value=mock_random_assortment)
     assert learner.actions(
         Context()).SerializeToString() == text_format.Parse(
             """
   arm_pulls {
     arm {
       set {
         id: 2
         id: 3
         id: 4
       }
     }
     times: 1
   }
   """, Actions()).SerializeToString()