def test_update_reward(self): policy = ucb1.UCB1(self.actions, self.historystorage, self.modelstorage) history_id, action = policy.get_action(context=None) policy.reward(history_id, 1) self.assertEqual( policy._historystorage.get_history(history_id).reward, 1)
def policy_generation(bandit, actions): historystorage = history.MemoryHistoryStorage() modelstorage = model.MemoryModelStorage() if bandit == 'Exp4P': policy = exp4p.Exp4P(actions, historystorage, modelstorage, delta=0.5, pmin=None) elif bandit == 'LinUCB': policy = linucb.LinUCB(actions, historystorage, modelstorage, 0.3, 20) elif bandit == 'LinThompSamp': policy = linthompsamp.LinThompSamp(actions, historystorage, modelstorage, d=20, delta=0.61, r=0.01, epsilon=0.71) elif bandit == 'UCB1': policy = ucb1.UCB1(actions, historystorage, modelstorage) elif bandit == 'Exp3': policy = exp3.Exp3(actions, historystorage, modelstorage, gamma=0.2) elif bandit == 'random': policy = 0 return policy
def test_model_storage(self): policy = ucb1.UCB1(self.actions, self.historystorage, self.modelstorage) history_id, action = policy.get_action(context=None) policy.reward(history_id, 1) self.assertEqual( policy._modelstorage._model['empirical_reward'][action], 2) self.assertEqual(policy._modelstorage._model['n_actions'][action], 2.0) self.assertEqual(policy._modelstorage._model['n_total'], 6.0)
def test_reward_order_descending(self): policy = ucb1.UCB1(self.actions, self.historystorage, self.modelstorage) history_id, action = policy.get_action(context=None) history_id_2, action_2 = policy.get_action(context=None) policy.reward(history_id_2, 1) self.assertEqual( policy._historystorage.get_history(history_id).reward, None) self.assertEqual( policy._historystorage.get_history(history_id_2).reward, 1.0)
def policy_generation(bandit, actions): """ Parameters ---------- bandit: 赌博机算法 actions:动作即推荐的电影 Returns ------- policy: 生成的策略 """ historystorage = history.MemoryHistoryStorage() # 内存中历史存储记录 modelstorage = model.MemoryModelStorage() # 内存中模型存储,为了统一 if bandit == 'Exp4P': policy = exp4p.Exp4P(historystorage, modelstorage, actions, delta=0.5, p_min=None) elif bandit == 'LinUCB': #policy = linucb.LinUCB(historystorage, modelstorage, actions, 0.3, 20) policy = linucb.LinUCB(history_storage=historystorage, model_storage=modelstorage, action_storage=actions, alpha=0.3, context_dimension=18) elif bandit == 'LinThompSamp': policy = linthompsamp.LinThompSamp( historystorage, modelstorage, actions, #d=20, Supposed to be context dimension context_dimension=18, delta=0.61, R=0.01, epsilon=0.71) elif bandit == 'UCB1': policy = ucb1.UCB1(historystorage, modelstorage, actions) elif bandit == 'Exp3': policy = exp3.Exp3(historystorage, modelstorage, actions, gamma=0.2) elif bandit == 'random': policy = 0 return policy
def policy_evaluation(self, policy, desired_action): if policy != 'UCB1': print("We don't support other bandit algorithms now!") else: historystorage = history.MemoryHistoryStorage() modelstorage = model.MemoryModelStorage() sum_error = 0 policy = ucb1.UCB1(self.actions, historystorage, modelstorage) for t in range(self.t): history_id, action = policy.get_action(context=None) if desired_action[t][0] != action: policy.reward(history_id, 0) sum_error += 1 else: policy.reward(history_id, 1) return self.t - sum_error
def policy_generation(bandit, actions): historystorage = history.MemoryHistoryStorage() modelstorage = model.MemoryModelStorage() if bandit == 'Exp4P': policy = exp4p.Exp4P(historystorage, modelstorage, actions, delta=0.5, p_min=None) elif bandit == 'LinUCB': #policy = linucb.LinUCB(historystorage, modelstorage, actions, 0.3, 20) policy = linucb.LinUCB(history_storage=historystorage, model_storage=modelstorage, action_storage=actions, alpha=0.3, context_dimension=18) elif bandit == 'LinThompSamp': policy = linthompsamp.LinThompSamp( historystorage, modelstorage, actions, #d=20, Supposed to be context dimension context_dimension=18, delta=0.61, R=0.01, epsilon=0.71) elif bandit == 'UCB1': policy = ucb1.UCB1(historystorage, modelstorage, actions) elif bandit == 'Exp3': policy = exp3.Exp3(historystorage, modelstorage, actions, gamma=0.2) elif bandit == 'random': policy = 0 return policy
def test_get_first_action(self): policy = ucb1.UCB1(self.actions, self.historystorage, self.modelstorage) history_id, action = policy.get_action(context=None) self.assertEqual(history_id, 0) self.assertIn(action, self.actions)
def test_initialization(self): policy = ucb1.UCB1(self.actions, self.historystorage, self.modelstorage) self.assertEqual(self.actions, policy._actions)