def test_simple_run(self): arm_num = 5 horizon = 10 learner = UCB(arm_num=arm_num) learner.reset() mock_ucb = np.array([1.2, 1, 1, 1, 1]) # pylint: disable=protected-access learner._UCB__UCB = MagicMock(return_value=mock_ucb) # During the initial time steps, each arm is pulled once for time in range(1, arm_num + 1): assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: {arm_id} > times: 1 > """.format(arm_id=time - 1), Actions()).SerializeToString() learner.update( text_format.Parse( """ arm_feedbacks < arm < id: {arm_id} > rewards: 0 > """.format(arm_id=time - 1), Feedback())) # For the left time steps, arm 0 is always the choice for _ in range(arm_num + 1, horizon + 1): assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: 0 > times: 1 > """, Actions()).SerializeToString() learner.update( text_format.Parse( """ arm_feedbacks < arm < id: 0 > rewards: 0 > """, Feedback()))
def test_simple_run(self): arm_num = 5 horizon = 10 learner = Uniform(arm_num=arm_num) learner.reset() for time in range(1, horizon + 1): assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: {arm_id} > times: 1 > """.format(arm_id=(time - 1) % arm_num), Actions()).SerializeToString() learner.update( text_format.Parse( """ arm_feedbacks < arm < id: 0 > rewards: 0 > """, Feedback()))
def test_simple_run(self): means = [0, 0.5, 0.7, 1] arms = [BernoulliArm(mean) for mean in means] learner = EpsGreedy(arm_num=len(arms)) learner.reset() # Pull each arm once during the initial steps for time in range(1, len(arms) + 1): assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: {arm_id} > times: 1 > """.format(arm_id=time - 1), Actions()).SerializeToString() learner.update( text_format.Parse( """ arm_feedbacks < arm < id: {arm_id} > rewards: 0 > """.format(arm_id=time - 1), Feedback()))
def test_simple_run(self): revenues = np.array([0, 0.7, 0.8, 0.9, 1.0]) horizon = 100 reward = CvarReward(0.7) learner = ThompsonSampling(revenues=revenues, horizon=horizon, reward=reward) # Test warm start learner.reset() assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls { arm { set { id: 1 } } times: 1 } """, Actions()).SerializeToString() learner.reset() # pylint: disable=protected-access learner._ThompsonSampling__within_warm_start = MagicMock( return_value=False) mock_preference_params = np.array([1, 1, 1, 1, 1]) learner._ThompsonSampling__correlated_sampling = MagicMock( return_value=mock_preference_params) assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls { arm { set { id: 1 id: 2 id: 3 id: 4 } } times: 1 } """, Actions()).SerializeToString()
def test_simple_run(self): ts_learner = ThompsonSampling(arm_num=4) ts_learner.reset() # pylint: disable=protected-access ts_learner._ThompsonSampling__sample_from_beta_prior = MagicMock( return_value=1) # always pull arm 1 assert ts_learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: 1 > times: 1 > """, Actions()).SerializeToString()
def test_simple_run(self): arm_num = 5 budget = 20 learner = SH(arm_num=arm_num, budget=budget) learner.reset() while True: actions = learner.actions(Context()) if not actions.arm_pulls: break feedback = Feedback() for arm_pull in actions.arm_pulls: arm_feedback = feedback.arm_feedbacks.add() arm_feedback.arm.id = arm_pull.arm.id arm_feedback.rewards.extend(list(np.zeros(arm_pull.times))) learner.update(feedback) assert learner.best_arm in list(range(arm_num))
def test_simple_run(self): arm_num = 3 confidence = 0.95 learner = ExpGap(arm_num=arm_num, confidence=confidence) learner.reset() while True: actions = learner.actions(Context()) if not actions.arm_pulls: break feedback = Feedback() for arm_pull in actions.arm_pulls: arm_feedback = feedback.arm_feedbacks.add() arm_feedback.arm.id = arm_pull.arm.id arm_feedback.rewards.extend( list( np.random.normal(arm_pull.arm.id / arm_num, 1, arm_pull.times))) learner.update(feedback) assert learner.best_arm in list(range(arm_num))
def test_simple_run(self): arm_num = 5 horizon = 10 learner = ExploreThenCommit(arm_num=arm_num, T_prime=6) learner.reset() for _ in range(1, horizon + 1): actions = learner.actions(Context()) assert len(actions.arm_pulls) == 1 arm_pull = actions.arm_pulls[0] arm_id = arm_pull.arm.id assert arm_pull.times == 1 learner.update( text_format.Parse( """ arm_feedbacks < arm < id: {arm_id} > rewards: 0 > """.format(arm_id=arm_id), Feedback()))
def test_simple_run(self): horizon = 10 features = [ np.array([1, 0]), np.array([1, 0]), np.array([1, 0]), np.array([1, 0]), np.array([0, 1]) ] learner = LinUCB(features, 0.1, 1e-3) learner.reset() mock_ucb = np.array([1.2, 1, 1, 1, 1]) # pylint: disable=protected-access learner._LinUCB__LinUCB = MagicMock(return_value=mock_ucb) # Always 0th arm is picked # not the most efficient test for _ in range(1, horizon + 1): assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls < arm < id: 0 > times: 1 > """, Actions()).SerializeToString() learner.update( text_format.Parse( """ arm_feedbacks < arm < id: 0 > rewards: 0 > """, Feedback()))
def test_actions(self): # Test actions are in the right range arm_num = 10 budget = 15 apt = APT(arm_num=arm_num, theta=0.5, eps=0) apt.reset() for _ in range(budget): actions = apt.actions(Context()) assert len(actions.arm_pulls) == 1 arm_id = actions.arm_pulls[0].arm.id assert 0 <= arm_id < arm_num apt.update( text_format.Parse( """ arm_feedbacks < arm < id: {arm_id} > rewards: 0 > """.format(arm_id=arm_id), Feedback()))
def test_simple_run(self): revenues = np.array([0, 0.45, 0.8, 0.9, 1.0]) reward = MeanReward() learner = EpsGreedy(revenues=revenues, reward=reward) learner.reset() mock_random_assortment = {2, 3, 4} # pylint: disable=protected-access learner._EpsGreedy__select_ramdom_assort = MagicMock( return_value=mock_random_assortment) assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls { arm { set { id: 2 id: 3 id: 4 } } times: 1 } """, Actions()).SerializeToString()
def test_simple_run(self): revenues = np.array([0, 0.7, 0.8, 0.9, 1.0]) reward = CvarReward(0.7) learner = UCB(revenues=revenues, reward=reward) learner.reset() mock_preference_params = np.array([1, 1, 1, 1, 1]) # pylint: disable=protected-access learner._UCB__UCB = MagicMock(return_value=mock_preference_params) assert learner.actions( Context()).SerializeToString() == text_format.Parse( """ arm_pulls { arm { set { id: 1 id: 2 id: 3 id: 4 } } times: 1 } """, Actions()).SerializeToString()
def context(self) -> Context: self.__context_and_rewards = self.__context_generator.context() context = Context() context.sequential_context.value.extend(self.__context_and_rewards[0]) return context
def context(self) -> Context: return Context()