Example #1
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.001, 1, 0.03)
    ctr_tuning = np.zeros(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)
    for gamma_i, gamma in enumerate(tuning_region):
        policy = Exp3(MemoryHistoryStorage(),
                      MemoryModelStorage(),
                      action_storage,
                      gamma=gamma,
                      random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[gamma_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    gamma_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="gamma changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = Exp3(MemoryHistoryStorage(),
                  MemoryModelStorage(),
                  action_storage,
                  gamma=gamma_opt,
                  random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Example #2
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])

    # Parameter tuning
    tuning_region = np.arange(0, 3, 0.05)
    ctr_tuning = np.empty(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)
    for alpha_i, alpha in enumerate(tuning_region):
        policy = LinUCB(history_storage=MemoryHistoryStorage(),
                        model_storage=MemoryModelStorage(),
                        action_storage=action_storage,
                        context_dimension=context_dimension,
                        alpha=alpha)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[alpha_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    alpha_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="alpha changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = LinUCB(history_storage=MemoryHistoryStorage(),
                    model_storage=MemoryModelStorage(),
                    action_storage=action_storage,
                    context_dimension=context_dimension,
                    alpha=alpha_opt)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Example #3
0
 def setUp(self):
     super(TestUCB1, self).setUp()
     self.policy = UCB1(self.history_storage, self.model_storage,
                        self.action_storage)
     self.policy_with_empty_action_storage = UCB1(MemoryHistoryStorage(),
                                                  MemoryModelStorage(),
                                                  MemoryActionStorage())
Example #4
0
 def setUp(self):
     super(TestExp3, self).setUp()
     self.gamma = 0.5
     self.policy = Exp3(self.history_storage,
                        self.model_storage,
                        self.action_storage,
                        gamma=self.gamma)
     self.policy_with_empty_action_storage = Exp3(MemoryHistoryStorage(),
                                                  MemoryModelStorage(),
                                                  MemoryActionStorage(),
                                                  gamma=self.gamma)
Example #5
0
 def setUp(self):
     super(TestLinUCB, self).setUp()
     self.context_dimension = 2
     self.alpha = 1.
     self.policy = LinUCB(self.history_storage,
                          self.model_storage,
                          self.action_storage,
                          context_dimension=self.context_dimension,
                          alpha=self.alpha)
     self.policy_with_empty_action_storage = LinUCB(
         MemoryHistoryStorage(),
         MemoryModelStorage(),
         MemoryActionStorage(),
         context_dimension=self.context_dimension,
         alpha=self.alpha)
Example #6
0
 def setUp(self):
     super(TestLinThompSamp, self).setUp()
     self.context_dimension = 2
     self.delta = 0.5
     self.R = 0.5  # pylint: disable=invalid-name
     self.epsilon = 0.1
     self.policy = LinThompSamp(self.history_storage,
                                self.model_storage,
                                self.action_storage,
                                context_dimension=self.context_dimension,
                                delta=self.delta,
                                R=self.R,
                                epsilon=self.epsilon)
     self.policy_with_empty_action_storage = LinThompSamp(
         MemoryHistoryStorage(),
         MemoryModelStorage(),
         MemoryActionStorage(),
         context_dimension=self.context_dimension,
         delta=self.delta,
         R=self.R,
         epsilon=self.epsilon)
Example #7
0
def main():
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])

    # Regret Analysis
    n_rounds = 10000
    context, desired_actions = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=1)
    policy = UCB1(MemoryHistoryStorage(), MemoryModelStorage(),
                  action_storage)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context[t])
        action_id = recommendation.action.id
        if desired_actions[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Example #8
0
def main():  # pylint: disable=too-many-locals
    n_rounds = 1000
    context_dimension = 5
    actions = [Action(i) for i in range(5)]

    action_ids = [0, 1, 2, 3, 4]
    context1, desired_actions1 = simulation.simulate_data(3000,
                                                          context_dimension,
                                                          actions,
                                                          "Exp4P",
                                                          random_state=0)
    experts = train_expert(context1, desired_actions1)

    # Parameter tuning
    tuning_region = np.arange(0.01, 1, 0.05)
    ctr_tuning = np.empty(len(tuning_region))
    advice1 = get_advice(context1, action_ids, experts)

    for delta_i, delta in enumerate(tuning_region):
        historystorage = MemoryHistoryStorage()
        modelstorage = MemoryModelStorage()
        policy = Exp4P(actions,
                       historystorage,
                       modelstorage,
                       delta=delta,
                       p_min=None)
        cum_regret = simulation.evaluate_policy(policy, advice1,
                                                desired_actions1)
        ctr_tuning[delta_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    delta_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="delta changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          actions,
                                                          "Exp4P",
                                                          random_state=1)
    advice2 = get_advice(context2, action_ids, experts)
    historystorage = MemoryHistoryStorage()
    modelstorage = MemoryModelStorage()
    policy = Exp4P(actions,
                   historystorage,
                   modelstorage,
                   delta=delta_opt,
                   p_min=None)

    for t in range(n_rounds):
        history_id, action = policy.get_action(advice2[t], 1)
        action_id = action[0]['action'].action_id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Example #9
0
def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.01, 0.99, 0.1)
    ctr_delta = np.zeros(shape=len(tuning_region))
    ctr_r = np.zeros(shape=len(tuning_region))
    ctr_epsilon = np.zeros(shape=len(tuning_region))

    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)

    for param_i, param in enumerate(tuning_region):
        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=param,
                              R=0.01,
                              epsilon=0.5,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_delta[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5,
                              R=param,
                              epsilon=0.5,
                              random_state=random_state)

        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_r[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5,
                              R=0.01,
                              epsilon=param,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_epsilon[param_i] = n_rounds - cum_regret[-1]

    ctr_delta /= n_rounds
    ctr_r /= n_rounds
    ctr_epsilon /= n_rounds

    delta_opt = tuning_region[np.argmax(ctr_delta)]
    r_opt = tuning_region[np.argmax(ctr_r)]
    epsilon_opt = tuning_region[np.argmax(ctr_epsilon)]

    # Plot the parameter tuning result
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_delta,
             'ro-',
             label="delta changes, R = 0.01, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_r,
             'gs-',
             label="delta = 0.5, R = changes, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_epsilon,
             'b^-',
             label="delta = 0.5, R = 0.01, eps = changes")
    plt.xlabel('parameter value')
    plt.ylabel('CTR')
    plt.legend(bbox_to_anchor=(1., 0.7))
    plt.ylim([0, 1])
    plt.title("Parameter Tunning Curve - LinThompSamp")
    plt.show()

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = LinThompSamp(MemoryHistoryStorage(),
                          MemoryModelStorage(),
                          action_storage,
                          context_dimension=context_dimension,
                          delta=delta_opt,
                          R=r_opt,
                          epsilon=epsilon_opt,
                          random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()
Example #10
0
 def setUp(self):  # pylint: disable=invalid-name
     self.model_storage = MemoryModelStorage()
     self.history_storage = MemoryHistoryStorage()
     self.action_storage = MemoryActionStorage()
     self.actions = [Action(i + 1) for i in range(3)]
     self.action_storage.add(self.actions)