def test_simple_run(self): arm_num = 5 horizon = 10 learner = UCB(arm_num=arm_num) learner.reset() mock_ucb = np.array([1.2, 1, 1, 1, 1]) # pylint: disable=protected-access learner._UCB__UCB = MagicMock(return_value=mock_ucb) # During the initial time steps, each arm is pulled once for time in range(1, arm_num + 1): assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: {arm_id} > times: 1 > """.format(arm_id=time - 1), Actions()).SerializeToString() learner.update( text_format.Parse( """ arm_feedbacks < arm < id: {arm_id} > rewards: 0 > """.format(arm_id=time - 1), Feedback())) # For the left time steps, arm 0 is always the choice for _ in range(arm_num + 1, horizon + 1): assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: 0 > times: 1 > """, Actions()).SerializeToString() learner.update( text_format.Parse( """ arm_feedbacks < arm < id: 0 > rewards: 0 > """, Feedback()))
def test_simple_run(self): arm_num = 5 horizon = 10 learner = Uniform(arm_num=arm_num) learner.reset() for time in range(1, horizon + 1): assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: {arm_id} > times: 1 > """.format(arm_id=(time - 1) % arm_num), Actions()).SerializeToString() learner.update( text_format.Parse( """ arm_feedbacks < arm < id: 0 > rewards: 0 > """, Feedback()))
def feed(self, actions: Actions) -> Feedback: feedback = Feedback() for arm_pull in actions.arm_pulls: arm_feedback = self._take_action(arm_pull=arm_pull) if arm_feedback.rewards: feedback.arm_feedbacks.append(arm_feedback) return feedback
def feed(self, actions: Actions) -> Feedback: feedback = Feedback() for arm_pull in actions.arm_pulls: if arm_pull.times > 0: arm_feedback = self._take_action(arm_pull=arm_pull) feedback.arm_feedbacks.append(arm_feedback) return feedback
def test_simple_run(self): means = [0, 0.5, 0.7, 1] arms = [BernoulliArm(mean) for mean in means] learner = EpsGreedy(arm_num=len(arms)) learner.reset() # Pull each arm once during the initial steps for time in range(1, len(arms) + 1): assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: {arm_id} > times: 1 > """.format(arm_id=time - 1), Actions()).SerializeToString() learner.update( text_format.Parse( """ arm_feedbacks < arm < id: {arm_id} > rewards: 0 > """.format(arm_id=time - 1), Feedback()))
def test_simple_run(self): arm_num = 5 budget = 20 learner = SH(arm_num=arm_num, budget=budget) learner.reset() while True: actions = learner.actions(Context()) if not actions.arm_pulls: break feedback = Feedback() for arm_pull in actions.arm_pulls: arm_feedback = feedback.arm_feedbacks.add() arm_feedback.arm.id = arm_pull.arm.id arm_feedback.rewards.extend(list(np.zeros(arm_pull.times))) learner.update(feedback) assert learner.best_arm in list(range(arm_num))
def test_simple_run(self): arm_num = 3 confidence = 0.95 learner = ExpGap(arm_num=arm_num, confidence=confidence) learner.reset() while True: actions = learner.actions(Context()) if not actions.arm_pulls: break feedback = Feedback() for arm_pull in actions.arm_pulls: arm_feedback = feedback.arm_feedbacks.add() arm_feedback.arm.id = arm_pull.arm.id arm_feedback.rewards.extend( list( np.random.normal(arm_pull.arm.id / arm_num, 1, arm_pull.times))) learner.update(feedback) assert learner.best_arm in list(range(arm_num))
def test_simple_run(self): arm_num = 5 horizon = 10 learner = ExploreThenCommit(arm_num=arm_num, T_prime=6) learner.reset() for _ in range(1, horizon + 1): actions = learner.actions(Context()) assert len(actions.arm_pulls) == 1 arm_pull = actions.arm_pulls[0] arm_id = arm_pull.arm.id assert arm_pull.times == 1 learner.update( text_format.Parse( """ arm_feedbacks < arm < id: {arm_id} > rewards: 0 > """.format(arm_id=arm_id), Feedback()))
def test_simple_run(self): horizon = 10 features = [ np.array([1, 0]), np.array([1, 0]), np.array([1, 0]), np.array([1, 0]), np.array([0, 1]) ] learner = LinUCB(features, 0.1, 1e-3) learner.reset() mock_ucb = np.array([1.2, 1, 1, 1, 1]) # pylint: disable=protected-access learner._LinUCB__LinUCB = MagicMock(return_value=mock_ucb) # Always 0th arm is picked # not the most efficient test for _ in range(1, horizon + 1): assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: 0 > times: 1 > """, Actions()).SerializeToString() learner.update( text_format.Parse( """ arm_feedbacks < arm < id: 0 > rewards: 0 > """, Feedback()))
def test_actions(self): # Test actions are in the right range arm_num = 10 budget = 15 apt = APT(arm_num=arm_num, theta=0.5, eps=0) apt.reset() for _ in range(budget): actions = apt.actions(Context()) assert len(actions.arm_pulls) == 1 arm_id = actions.arm_pulls[0].arm.id assert 0 <= arm_id < arm_num apt.update( text_format.Parse( """ arm_feedbacks < arm < id: {arm_id} > rewards: 0 > """.format(arm_id=arm_id), Feedback()))
def test_simple_run(self): arm_num = 3 confidence = 0.95 learner = CentralizedLilUCBHeuristic(arm_num=arm_num, confidence=confidence, assigned_arms=np.arange(arm_num)) learner.reset() while True: actions = learner.actions() if not actions.arm_pulls: break feedback = Feedback() for arm_pull in actions.arm_pulls: arm_feedback = feedback.arm_feedbacks.add() arm_feedback.arm.id = arm_pull.arm.id arm_feedback.rewards.extend( list( np.random.normal(arm_pull.arm.id / arm_num, 1, arm_pull.times))) learner.update(feedback) assert learner.best_arm in list(range(arm_num))