Beispiel #1
0
    def test_simple_run(self):
        arm_num = 5
        horizon = 10
        learner = UCB(arm_num=arm_num)
        learner.reset()
        mock_ucb = np.array([1.2, 1, 1, 1, 1])
        # pylint: disable=protected-access
        learner._UCB__UCB = MagicMock(return_value=mock_ucb)

        # During the initial time steps, each arm is pulled once
        for time in range(1, arm_num + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: {arm_id}
          >
          times: 1
        >
        """.format(arm_id=time - 1), Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: {arm_id}
          >
          rewards: 0
        >
        """.format(arm_id=time - 1), Feedback()))
        # For the left time steps, arm 0 is always the choice
        for _ in range(arm_num + 1, horizon + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: 0
          >
          times: 1
        >
        """, Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: 0
          >
          rewards: 0
        >
        """, Feedback()))
Beispiel #2
0
    def test_simple_run(self):
        arm_num = 5
        horizon = 10
        learner = Uniform(arm_num=arm_num)
        learner.reset()

        for time in range(1, horizon + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: {arm_id}
          >
          times: 1
        >
        """.format(arm_id=(time - 1) % arm_num),
                    Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: 0
          >
          rewards: 0
        >
        """, Feedback()))
    def test_simple_run(self):
        means = [0, 0.5, 0.7, 1]
        arms = [BernoulliArm(mean) for mean in means]
        learner = EpsGreedy(arm_num=len(arms))
        learner.reset()

        # Pull each arm once during the initial steps
        for time in range(1, len(arms) + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: {arm_id}
          >
          times: 1
        >
        """.format(arm_id=time - 1), Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: {arm_id}
          >
          rewards: 0
        >
        """.format(arm_id=time - 1), Feedback()))
Beispiel #4
0
    def test_simple_run(self):
        revenues = np.array([0, 0.7, 0.8, 0.9, 1.0])
        horizon = 100
        reward = CvarReward(0.7)
        learner = ThompsonSampling(revenues=revenues,
                                   horizon=horizon,
                                   reward=reward)

        # Test warm start
        learner.reset()
        assert learner.actions(
            Context()).SerializeToString() == text_format.Parse(
                """
      arm_pulls {
        arm {
          set {
            id: 1
          }
        }
        times: 1
      }
      """, Actions()).SerializeToString()

        learner.reset()
        # pylint: disable=protected-access
        learner._ThompsonSampling__within_warm_start = MagicMock(
            return_value=False)
        mock_preference_params = np.array([1, 1, 1, 1, 1])
        learner._ThompsonSampling__correlated_sampling = MagicMock(
            return_value=mock_preference_params)
        assert learner.actions(
            Context()).SerializeToString() == text_format.Parse(
                """
      arm_pulls {
        arm {
          set {
            id: 1
            id: 2
            id: 3
            id: 4
          }
        }
        times: 1
      }
      """, Actions()).SerializeToString()
Beispiel #5
0
 def test_simple_run(self):
   ts_learner = ThompsonSampling(arm_num=4)
   ts_learner.reset()
   # pylint: disable=protected-access
   ts_learner._ThompsonSampling__sample_from_beta_prior = MagicMock(
       return_value=1)
   # always pull arm 1
   assert ts_learner.actions(
       Context()).SerializeToString() == text_format.Parse(
           """
       arm_pulls <
         arm <
           id: 1
         >
         times: 1
       >
       """, Actions()).SerializeToString()
Beispiel #6
0
    def test_simple_run(self):
        arm_num = 5
        budget = 20
        learner = SH(arm_num=arm_num, budget=budget)
        learner.reset()

        while True:
            actions = learner.actions(Context())
            if not actions.arm_pulls:
                break

            feedback = Feedback()
            for arm_pull in actions.arm_pulls:
                arm_feedback = feedback.arm_feedbacks.add()
                arm_feedback.arm.id = arm_pull.arm.id
                arm_feedback.rewards.extend(list(np.zeros(arm_pull.times)))
            learner.update(feedback)
        assert learner.best_arm in list(range(arm_num))
Beispiel #7
0
    def test_simple_run(self):
        arm_num = 3
        confidence = 0.95
        learner = ExpGap(arm_num=arm_num, confidence=confidence)
        learner.reset()

        while True:
            actions = learner.actions(Context())
            if not actions.arm_pulls:
                break

            feedback = Feedback()
            for arm_pull in actions.arm_pulls:
                arm_feedback = feedback.arm_feedbacks.add()
                arm_feedback.arm.id = arm_pull.arm.id
                arm_feedback.rewards.extend(
                    list(
                        np.random.normal(arm_pull.arm.id / arm_num, 1,
                                         arm_pull.times)))
            learner.update(feedback)
        assert learner.best_arm in list(range(arm_num))
    def test_simple_run(self):
        arm_num = 5
        horizon = 10
        learner = ExploreThenCommit(arm_num=arm_num, T_prime=6)
        learner.reset()

        for _ in range(1, horizon + 1):
            actions = learner.actions(Context())
            assert len(actions.arm_pulls) == 1
            arm_pull = actions.arm_pulls[0]
            arm_id = arm_pull.arm.id
            assert arm_pull.times == 1
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: {arm_id}
          >
          rewards: 0
        >
        """.format(arm_id=arm_id), Feedback()))
Beispiel #9
0
    def test_simple_run(self):
        horizon = 10
        features = [
            np.array([1, 0]),
            np.array([1, 0]),
            np.array([1, 0]),
            np.array([1, 0]),
            np.array([0, 1])
        ]
        learner = LinUCB(features, 0.1, 1e-3)
        learner.reset()
        mock_ucb = np.array([1.2, 1, 1, 1, 1])
        # pylint: disable=protected-access
        learner._LinUCB__LinUCB = MagicMock(return_value=mock_ucb)

        # Always 0th arm is picked
        # not the most efficient test
        for _ in range(1, horizon + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
            arm_pulls <
              arm <
                id: 0
              >
              times: 1
            >
            """, Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
            arm_feedbacks <
              arm <
                id: 0
              >
              rewards: 0
            >
            """, Feedback()))
Beispiel #10
0
  def test_actions(self):
    # Test actions are in the right range
    arm_num = 10
    budget = 15
    apt = APT(arm_num=arm_num, theta=0.5, eps=0)
    apt.reset()
    for _ in range(budget):
      actions = apt.actions(Context())
      assert len(actions.arm_pulls) == 1

      arm_id = actions.arm_pulls[0].arm.id
      assert 0 <= arm_id < arm_num

      apt.update(
          text_format.Parse(
              """
        arm_feedbacks <
          arm <
            id: {arm_id}
          >
          rewards: 0
        >
        """.format(arm_id=arm_id), Feedback()))
Beispiel #11
0
 def test_simple_run(self):
     revenues = np.array([0, 0.45, 0.8, 0.9, 1.0])
     reward = MeanReward()
     learner = EpsGreedy(revenues=revenues, reward=reward)
     learner.reset()
     mock_random_assortment = {2, 3, 4}
     # pylint: disable=protected-access
     learner._EpsGreedy__select_ramdom_assort = MagicMock(
         return_value=mock_random_assortment)
     assert learner.actions(
         Context()).SerializeToString() == text_format.Parse(
             """
   arm_pulls {
     arm {
       set {
         id: 2
         id: 3
         id: 4
       }
     }
     times: 1
   }
   """, Actions()).SerializeToString()
Beispiel #12
0
    def test_simple_run(self):
        revenues = np.array([0, 0.7, 0.8, 0.9, 1.0])
        reward = CvarReward(0.7)
        learner = UCB(revenues=revenues, reward=reward)

        learner.reset()
        mock_preference_params = np.array([1, 1, 1, 1, 1])
        # pylint: disable=protected-access
        learner._UCB__UCB = MagicMock(return_value=mock_preference_params)
        assert learner.actions(
            Context()).SerializeToString() == text_format.Parse(
                """
      arm_pulls {
        arm {
          set {
            id: 1
            id: 2
            id: 3
            id: 4
          }
        }
        times: 1
      }
      """, Actions()).SerializeToString()
 def context(self) -> Context:
   self.__context_and_rewards = self.__context_generator.context()
   context = Context()
   context.sequential_context.value.extend(self.__context_and_rewards[0])
   return context
Beispiel #14
0
 def context(self) -> Context:
     return Context()