コード例 #1
0
def get_data():
    streaming_batch = pd.read_csv('streaming_batch.csv',
                                  sep='\t',
                                  names=['user_id'],
                                  engine='c')
    user_feature = pd.read_csv('user_feature.csv',
                               sep='\t',
                               header=0,
                               index_col=0,
                               engine='c')
    actions_id = list(
        pd.read_csv('actions.csv', sep='\t', header=0, engine='c')['movie_id'])
    reward_list = pd.read_csv('reward_list.csv',
                              sep='\t',
                              header=0,
                              engine='c')
    action_context = pd.read_csv('action_context.csv',
                                 sep='\t',
                                 header=0,
                                 engine='c')

    tempactions = []
    for key in actions_id:
        action = Action(key)
        tempactions.append(action)
    actions = MemoryActionStorage()
    actions.add(tempactions)
    return streaming_batch, user_feature, actions, reward_list, action_context
コード例 #2
0
 def setUp(self):
     super(TestUCB1, self).setUp()
     self.policy = UCB1(self.history_storage, self.model_storage,
                        self.action_storage)
     self.policy_with_empty_action_storage = UCB1(MemoryHistoryStorage(),
                                                  MemoryModelStorage(),
                                                  MemoryActionStorage())
コード例 #3
0
 def setUp(self):
     super(TestExp3, self).setUp()
     self.gamma = 0.5
     self.policy = Exp3(self.history_storage,
                        self.model_storage,
                        self.action_storage,
                        gamma=self.gamma)
     self.policy_with_empty_action_storage = Exp3(MemoryHistoryStorage(),
                                                  MemoryModelStorage(),
                                                  MemoryActionStorage(),
                                                  gamma=self.gamma)
コード例 #4
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.001, 1, 0.03)
    ctr_tuning = np.zeros(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)
    for gamma_i, gamma in enumerate(tuning_region):
        policy = Exp3(MemoryHistoryStorage(),
                      MemoryModelStorage(),
                      action_storage,
                      gamma=gamma,
                      random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[gamma_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    gamma_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="gamma changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = Exp3(MemoryHistoryStorage(),
                  MemoryModelStorage(),
                  action_storage,
                  gamma=gamma_opt,
                  random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
コード例 #5
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])

    # Parameter tuning
    tuning_region = np.arange(0, 3, 0.05)
    ctr_tuning = np.empty(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)
    for alpha_i, alpha in enumerate(tuning_region):
        policy = LinUCB(history_storage=MemoryHistoryStorage(),
                        model_storage=MemoryModelStorage(),
                        action_storage=action_storage,
                        context_dimension=context_dimension,
                        alpha=alpha)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[alpha_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    alpha_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="alpha changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = LinUCB(history_storage=MemoryHistoryStorage(),
                    model_storage=MemoryModelStorage(),
                    action_storage=action_storage,
                    context_dimension=context_dimension,
                    alpha=alpha_opt)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
コード例 #6
0
 def setUp(self):
     super(TestLinUCB, self).setUp()
     self.context_dimension = 2
     self.alpha = 1.
     self.policy = LinUCB(self.history_storage,
                          self.model_storage,
                          self.action_storage,
                          context_dimension=self.context_dimension,
                          alpha=self.alpha)
     self.policy_with_empty_action_storage = LinUCB(
         MemoryHistoryStorage(),
         MemoryModelStorage(),
         MemoryActionStorage(),
         context_dimension=self.context_dimension,
         alpha=self.alpha)
コード例 #7
0
 def setUp(self):
     super(TestLinThompSamp, self).setUp()
     self.context_dimension = 2
     self.delta = 0.5
     self.R = 0.5  # pylint: disable=invalid-name
     self.epsilon = 0.1
     self.policy = LinThompSamp(self.history_storage,
                                self.model_storage,
                                self.action_storage,
                                context_dimension=self.context_dimension,
                                delta=self.delta,
                                R=self.R,
                                epsilon=self.epsilon)
     self.policy_with_empty_action_storage = LinThompSamp(
         MemoryHistoryStorage(),
         MemoryModelStorage(),
         MemoryActionStorage(),
         context_dimension=self.context_dimension,
         delta=self.delta,
         R=self.R,
         epsilon=self.epsilon)
コード例 #8
0
def main():
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])

    # Regret Analysis
    n_rounds = 10000
    context, desired_actions = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=1)
    policy = UCB1(MemoryHistoryStorage(), MemoryModelStorage(),
                  action_storage)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context[t])
        action_id = recommendation.action.id
        if desired_actions[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
コード例 #9
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.01, 0.99, 0.1)
    ctr_delta = np.zeros(shape=len(tuning_region))
    ctr_r = np.zeros(shape=len(tuning_region))
    ctr_epsilon = np.zeros(shape=len(tuning_region))

    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)

    for param_i, param in enumerate(tuning_region):
        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=param,
                              R=0.01,
                              epsilon=0.5,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_delta[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5,
                              R=param,
                              epsilon=0.5,
                              random_state=random_state)

        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_r[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5,
                              R=0.01,
                              epsilon=param,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_epsilon[param_i] = n_rounds - cum_regret[-1]

    ctr_delta /= n_rounds
    ctr_r /= n_rounds
    ctr_epsilon /= n_rounds

    delta_opt = tuning_region[np.argmax(ctr_delta)]
    r_opt = tuning_region[np.argmax(ctr_r)]
    epsilon_opt = tuning_region[np.argmax(ctr_epsilon)]

    # Plot the parameter tuning result
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_delta,
             'ro-',
             label="delta changes, R = 0.01, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_r,
             'gs-',
             label="delta = 0.5, R = changes, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_epsilon,
             'b^-',
             label="delta = 0.5, R = 0.01, eps = changes")
    plt.xlabel('parameter value')
    plt.ylabel('CTR')
    plt.legend(bbox_to_anchor=(1., 0.7))
    plt.ylim([0, 1])
    plt.title("Parameter Tunning Curve - LinThompSamp")
    plt.show()

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = LinThompSamp(MemoryHistoryStorage(),
                          MemoryModelStorage(),
                          action_storage,
                          context_dimension=context_dimension,
                          delta=delta_opt,
                          R=r_opt,
                          epsilon=epsilon_opt,
                          random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
コード例 #10
0
ファイル: base_bandit_test.py プロジェクト: shampp/qr_cmab
 def setUp(self):  # pylint: disable=invalid-name
     self.model_storage = MemoryModelStorage()
     self.history_storage = MemoryHistoryStorage()
     self.action_storage = MemoryActionStorage()
     self.actions = [Action(i + 1) for i in range(3)]
     self.action_storage.add(self.actions)