def policy_generation(bandit, actions):
    historystorage = history.MemoryHistoryStorage()
    modelstorage = model.MemoryModelStorage()

    if bandit == 'Exp4P':
        policy = exp4p.Exp4P(actions,
                             historystorage,
                             modelstorage,
                             delta=0.5,
                             pmin=None)

    elif bandit == 'LinUCB':
        policy = linucb.LinUCB(actions, historystorage, modelstorage, 0.3, 20)

    elif bandit == 'LinThompSamp':
        policy = linthompsamp.LinThompSamp(actions,
                                           historystorage,
                                           modelstorage,
                                           d=20,
                                           delta=0.61,
                                           r=0.01,
                                           epsilon=0.71)

    elif bandit == 'UCB1':
        policy = ucb1.UCB1(actions, historystorage, modelstorage)

    elif bandit == 'Exp3':
        policy = exp3.Exp3(actions, historystorage, modelstorage, gamma=0.2)

    elif bandit == 'random':
        policy = 0

    return policy
Beispiel #2
0
 def test_update_reward(self):
     policy = linucb.LinUCB(self.actions, self.historystorage,
                            self.modelstorage, 1.00, 2)
     history_id, action = policy.get_action([[1, 1], [2, 2], [3, 3]])
     policy.reward(history_id, 1)
     self.assertEqual(
         policy._historystorage.get_history(history_id).reward, 1)
Beispiel #3
0
 def test_get_first_action(self):
     policy = linucb.LinUCB(self.actions, self.historystorage,
                            self.modelstorage, 1.00, 2)
     history_id, action = policy.get_action([[1, 1], [2, 2], [3, 3]])
     self.assertEqual(history_id, 0)
     self.assertIn(action, self.actions)
     self.assertEqual(
         policy._historystorage.get_history(history_id).context,
         [[1, 1], [2, 2], [3, 3]])
Beispiel #4
0
 def test_model_storage(self):
     policy = linucb.LinUCB(self.actions, self.historystorage,
                            self.modelstorage, 1.00, 2)
     history_id, action = policy.get_action([[1, 1], [2, 2], [3, 3]])
     policy.reward(history_id, 1)
     self.assertEqual(len(policy._modelstorage.get_model()['b']), 3)
     self.assertEqual(len(policy._modelstorage.get_model()['b'][1]), 2)
     self.assertEqual(len(policy._modelstorage.get_model()['matrix_a']), 3)
     self.assertEqual(policy._modelstorage.get_model()['matrix_a'][1].shape,
                      (2, 2))
Beispiel #5
0
def policy_generation(bandit, actions):
    """
    Parameters
    ----------
    bandit: 赌博机算法
    actions:动作即推荐的电影

    Returns
    -------
    policy: 生成的策略
    """
    historystorage = history.MemoryHistoryStorage()  # 内存中历史存储记录
    modelstorage = model.MemoryModelStorage()  # 内存中模型存储,为了统一

    if bandit == 'Exp4P':
        policy = exp4p.Exp4P(historystorage,
                             modelstorage,
                             actions,
                             delta=0.5,
                             p_min=None)

    elif bandit == 'LinUCB':
        #policy = linucb.LinUCB(historystorage, modelstorage, actions, 0.3, 20)
        policy = linucb.LinUCB(history_storage=historystorage,
                               model_storage=modelstorage,
                               action_storage=actions,
                               alpha=0.3,
                               context_dimension=18)

    elif bandit == 'LinThompSamp':
        policy = linthompsamp.LinThompSamp(
            historystorage,
            modelstorage,
            actions,
            #d=20, Supposed to be context dimension
            context_dimension=18,
            delta=0.61,
            R=0.01,
            epsilon=0.71)

    elif bandit == 'UCB1':
        policy = ucb1.UCB1(historystorage, modelstorage, actions)

    elif bandit == 'Exp3':
        policy = exp3.Exp3(historystorage, modelstorage, actions, gamma=0.2)

    elif bandit == 'random':
        policy = 0

    return policy
Beispiel #6
0
 def test_add_action(self):
     policy = linucb.LinUCB(self.actions, self.historystorage,
                            self.modelstorage, 1.00, 2)
     history_id, action = policy.get_action([[1, 1], [2, 2], [3, 3]])
     policy.add_action([4, 5])
     policy.reward(history_id, 1)
     self.assertEqual(policy._actions, [1, 2, 3, 4, 5])
     self.assertTrue(
         (policy._modelstorage.get_model()['matrix_a'][4] == np.identity(2)
          ).all())
     history_id2, action2 = policy.get_action([[1, 1], [2, 2], [3, 3],
                                               [4, 4], [5, 5]])
     policy.reward(history_id2, 1)
     self.assertFalse((policy._modelstorage.get_model()['matrix_a'][action2]
                       == np.identity(2)).all())
Beispiel #7
0
 def test_reward_order_descending(self):
     policy = linucb.LinUCB(self.actions, self.historystorage,
                            self.modelstorage, 1.00, 2)
     history_id, action = policy.get_action([[1, 1], [2, 2], [3, 3]])
     history_id_2, action_2 = policy.get_action([[0, 0], [3, 3], [6, 6]])
     policy.reward(history_id_2, 1)
     self.assertEqual(
         policy._historystorage.get_history(history_id).context,
         [[1, 1], [2, 2], [3, 3]])
     self.assertEqual(
         policy._historystorage.get_history(history_id_2).context,
         [[0, 0], [3, 3], [6, 6]])
     self.assertEqual(
         policy._historystorage.get_history(history_id).reward, None)
     self.assertEqual(
         policy._historystorage.get_history(history_id_2).reward, 1)
def policy_generation(bandit, actions):
    historystorage = history.MemoryHistoryStorage()
    modelstorage = model.MemoryModelStorage()

    if bandit == 'Exp4P':
        policy = exp4p.Exp4P(historystorage,
                             modelstorage,
                             actions,
                             delta=0.5,
                             p_min=None)

    elif bandit == 'LinUCB':
        #policy = linucb.LinUCB(historystorage, modelstorage, actions, 0.3, 20)
        policy = linucb.LinUCB(history_storage=historystorage,
                               model_storage=modelstorage,
                               action_storage=actions,
                               alpha=0.3,
                               context_dimension=18)

    elif bandit == 'LinThompSamp':
        policy = linthompsamp.LinThompSamp(
            historystorage,
            modelstorage,
            actions,
            #d=20, Supposed to be context dimension
            context_dimension=18,
            delta=0.61,
            R=0.01,
            epsilon=0.71)

    elif bandit == 'UCB1':
        policy = ucb1.UCB1(historystorage, modelstorage, actions)

    elif bandit == 'Exp3':
        policy = exp3.Exp3(historystorage, modelstorage, actions, gamma=0.2)

    elif bandit == 'random':
        policy = 0

    return policy
Beispiel #9
0
 def policy_evaluation(self, policy, context, desired_action, alpha):
     if policy != 'LinUCB':
         print("We don't support other bandit algorithms now!")
     else:
         historystorage = history.MemoryHistoryStorage()
         modelstorage = model.MemoryModelStorage()
         # sum_error = 0
         policy = linucb.LinUCB(self.actions, historystorage, modelstorage,
                                alpha, self.d)
         seq_error = np.zeros(shape=(self.t, 1))
         for t in range(self.t):
             history_id, action = policy.get_action(context[t])
             if desired_action[t][0] != action:
                 policy.reward(history_id, 0)
                 # sum_error += 1
                 if t == 0:
                     seq_error[t] = 1.0
                 else:
                     seq_error[t] = seq_error[t - 1] + 1.0
             else:
                 policy.reward(history_id, 1)
                 if t > 0:
                     seq_error[t] = seq_error[t - 1]
         return seq_error
Beispiel #10
0
 def test_initialization(self):
     policy = linucb.LinUCB(self.actions, self.historystorage,
                            self.modelstorage, 1.00, 2)
     self.assertEqual(self.actions, policy._actions)
     self.assertEqual(1.00, policy.alpha)
     self.assertEqual(2, policy.d)