예제 #1
0
    def test_simple_run(self):
        arm_num = 5
        horizon = 10
        learner = UCB(arm_num=arm_num)
        learner.reset()
        mock_ucb = np.array([1.2, 1, 1, 1, 1])
        # pylint: disable=protected-access
        learner._UCB__UCB = MagicMock(return_value=mock_ucb)

        # During the initial time steps, each arm is pulled once
        for time in range(1, arm_num + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: {arm_id}
          >
          times: 1
        >
        """.format(arm_id=time - 1), Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: {arm_id}
          >
          rewards: 0
        >
        """.format(arm_id=time - 1), Feedback()))
        # For the left time steps, arm 0 is always the choice
        for _ in range(arm_num + 1, horizon + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: 0
          >
          times: 1
        >
        """, Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: 0
          >
          rewards: 0
        >
        """, Feedback()))
예제 #2
0
    def test_simple_run(self):
        arm_num = 5
        horizon = 10
        learner = Uniform(arm_num=arm_num)
        learner.reset()

        for time in range(1, horizon + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: {arm_id}
          >
          times: 1
        >
        """.format(arm_id=(time - 1) % arm_num),
                    Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: 0
          >
          rewards: 0
        >
        """, Feedback()))
예제 #3
0
 def feed(self, actions: Actions) -> Feedback:
     feedback = Feedback()
     for arm_pull in actions.arm_pulls:
         arm_feedback = self._take_action(arm_pull=arm_pull)
         if arm_feedback.rewards:
             feedback.arm_feedbacks.append(arm_feedback)
     return feedback
예제 #4
0
 def feed(self, actions: Actions) -> Feedback:
     feedback = Feedback()
     for arm_pull in actions.arm_pulls:
         if arm_pull.times > 0:
             arm_feedback = self._take_action(arm_pull=arm_pull)
             feedback.arm_feedbacks.append(arm_feedback)
     return feedback
예제 #5
0
    def test_simple_run(self):
        means = [0, 0.5, 0.7, 1]
        arms = [BernoulliArm(mean) for mean in means]
        learner = EpsGreedy(arm_num=len(arms))
        learner.reset()

        # Pull each arm once during the initial steps
        for time in range(1, len(arms) + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
        arm_pulls <
          arm <
            id: {arm_id}
          >
          times: 1
        >
        """.format(arm_id=time - 1), Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: {arm_id}
          >
          rewards: 0
        >
        """.format(arm_id=time - 1), Feedback()))
예제 #6
0
    def test_simple_run(self):
        arm_num = 5
        budget = 20
        learner = SH(arm_num=arm_num, budget=budget)
        learner.reset()

        while True:
            actions = learner.actions(Context())
            if not actions.arm_pulls:
                break

            feedback = Feedback()
            for arm_pull in actions.arm_pulls:
                arm_feedback = feedback.arm_feedbacks.add()
                arm_feedback.arm.id = arm_pull.arm.id
                arm_feedback.rewards.extend(list(np.zeros(arm_pull.times)))
            learner.update(feedback)
        assert learner.best_arm in list(range(arm_num))
예제 #7
0
    def test_simple_run(self):
        arm_num = 3
        confidence = 0.95
        learner = ExpGap(arm_num=arm_num, confidence=confidence)
        learner.reset()

        while True:
            actions = learner.actions(Context())
            if not actions.arm_pulls:
                break

            feedback = Feedback()
            for arm_pull in actions.arm_pulls:
                arm_feedback = feedback.arm_feedbacks.add()
                arm_feedback.arm.id = arm_pull.arm.id
                arm_feedback.rewards.extend(
                    list(
                        np.random.normal(arm_pull.arm.id / arm_num, 1,
                                         arm_pull.times)))
            learner.update(feedback)
        assert learner.best_arm in list(range(arm_num))
    def test_simple_run(self):
        arm_num = 5
        horizon = 10
        learner = ExploreThenCommit(arm_num=arm_num, T_prime=6)
        learner.reset()

        for _ in range(1, horizon + 1):
            actions = learner.actions(Context())
            assert len(actions.arm_pulls) == 1
            arm_pull = actions.arm_pulls[0]
            arm_id = arm_pull.arm.id
            assert arm_pull.times == 1
            learner.update(
                text_format.Parse(
                    """
        arm_feedbacks <
          arm <
            id: {arm_id}
          >
          rewards: 0
        >
        """.format(arm_id=arm_id), Feedback()))
예제 #9
0
    def test_simple_run(self):
        horizon = 10
        features = [
            np.array([1, 0]),
            np.array([1, 0]),
            np.array([1, 0]),
            np.array([1, 0]),
            np.array([0, 1])
        ]
        learner = LinUCB(features, 0.1, 1e-3)
        learner.reset()
        mock_ucb = np.array([1.2, 1, 1, 1, 1])
        # pylint: disable=protected-access
        learner._LinUCB__LinUCB = MagicMock(return_value=mock_ucb)

        # Always 0th arm is picked
        # not the most efficient test
        for _ in range(1, horizon + 1):
            assert learner.actions(
                Context()).SerializeToString() == text_format.Parse(
                    """
            arm_pulls <
              arm <
                id: 0
              >
              times: 1
            >
            """, Actions()).SerializeToString()
            learner.update(
                text_format.Parse(
                    """
            arm_feedbacks <
              arm <
                id: 0
              >
              rewards: 0
            >
            """, Feedback()))
예제 #10
0
  def test_actions(self):
    # Test actions are in the right range
    arm_num = 10
    budget = 15
    apt = APT(arm_num=arm_num, theta=0.5, eps=0)
    apt.reset()
    for _ in range(budget):
      actions = apt.actions(Context())
      assert len(actions.arm_pulls) == 1

      arm_id = actions.arm_pulls[0].arm.id
      assert 0 <= arm_id < arm_num

      apt.update(
          text_format.Parse(
              """
        arm_feedbacks <
          arm <
            id: {arm_id}
          >
          rewards: 0
        >
        """.format(arm_id=arm_id), Feedback()))
    def test_simple_run(self):
        arm_num = 3
        confidence = 0.95
        learner = CentralizedLilUCBHeuristic(arm_num=arm_num,
                                             confidence=confidence,
                                             assigned_arms=np.arange(arm_num))
        learner.reset()

        while True:
            actions = learner.actions()
            if not actions.arm_pulls:
                break

            feedback = Feedback()
            for arm_pull in actions.arm_pulls:
                arm_feedback = feedback.arm_feedbacks.add()
                arm_feedback.arm.id = arm_pull.arm.id
                arm_feedback.rewards.extend(
                    list(
                        np.random.normal(arm_pull.arm.id / arm_num, 1,
                                         arm_pull.times)))
            learner.update(feedback)

        assert learner.best_arm in list(range(arm_num))