def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) random_state = np.random.RandomState(0) # Parameter tuning tuning_region = np.arange(0.001, 1, 0.03) ctr_tuning = np.zeros(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=0) for gamma_i, gamma in enumerate(tuning_region): policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, gamma=gamma, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_tuning[gamma_i] = n_rounds - cum_regret[-1] ctr_tuning /= n_rounds gamma_opt = tuning_region[np.argmax(ctr_tuning)] simulation.plot_tuning_curve(tuning_region, ctr_tuning, label="gamma changes") # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=1) policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, gamma=gamma_opt, random_state=random_state) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) # Parameter tuning tuning_region = np.arange(0, 3, 0.05) ctr_tuning = np.empty(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=0) for alpha_i, alpha in enumerate(tuning_region): policy = LinUCB(history_storage=MemoryHistoryStorage(), model_storage=MemoryModelStorage(), action_storage=action_storage, context_dimension=context_dimension, alpha=alpha) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_tuning[alpha_i] = n_rounds - cum_regret[-1] ctr_tuning /= n_rounds alpha_opt = tuning_region[np.argmax(ctr_tuning)] simulation.plot_tuning_curve(tuning_region, ctr_tuning, label="alpha changes") # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=1) policy = LinUCB(history_storage=MemoryHistoryStorage(), model_storage=MemoryModelStorage(), action_storage=action_storage, context_dimension=context_dimension, alpha=alpha_opt) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): # pylint: disable=too-many-locals n_rounds = 1000 context_dimension = 5 actions = [Action(i) for i in range(5)] action_ids = [0, 1, 2, 3, 4] context1, desired_actions1 = simulation.simulate_data( 3000, context_dimension, actions, "Exp4P", random_state=0) experts = train_expert(context1, desired_actions1) # Parameter tuning tuning_region = np.arange(0.01, 1, 0.05) ctr_tuning = np.empty(len(tuning_region)) advice1 = get_advice(context1, action_ids, experts) for delta_i, delta in enumerate(tuning_region): historystorage = MemoryHistoryStorage() modelstorage = MemoryModelStorage() policy = Exp4P(actions, historystorage, modelstorage, delta=delta, p_min=None) cum_regret = simulation.evaluate_policy(policy, advice1, desired_actions1) ctr_tuning[delta_i] = n_rounds - cum_regret[-1] ctr_tuning /= n_rounds delta_opt = tuning_region[np.argmax(ctr_tuning)] simulation.plot_tuning_curve(tuning_region, ctr_tuning, label="delta changes") # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data( n_rounds, context_dimension, actions, "Exp4P", random_state=1) advice2 = get_advice(context2, action_ids, experts) historystorage = MemoryHistoryStorage() modelstorage = MemoryModelStorage() policy = Exp4P(actions, historystorage, modelstorage, delta=delta_opt, p_min=None) for t in range(n_rounds): history_id, action = policy.get_action(advice2[t], 1) action_id = action[0]['action'].action_id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) random_state = np.random.RandomState(0) # Parameter tuning tuning_region = np.arange(0.001, 1, 0.03) ctr_tuning = np.zeros(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data( n_rounds, context_dimension, action_storage, random_state=0) for gamma_i, gamma in enumerate(tuning_region): policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, gamma=gamma, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_tuning[gamma_i] = n_rounds - cum_regret[-1] ctr_tuning /= n_rounds gamma_opt = tuning_region[np.argmax(ctr_tuning)] simulation.plot_tuning_curve(tuning_region, ctr_tuning, label="gamma changes") # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data( n_rounds, context_dimension, action_storage, random_state=1) policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, gamma=gamma_opt, random_state=random_state) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) # Regret Analysis n_rounds = 10000 context, desired_actions = simulation.simulate_data( n_rounds, context_dimension, action_storage, random_state=1) policy = UCB1(MemoryHistoryStorage(), MemoryModelStorage(), action_storage) for t in range(n_rounds): history_id, recommendation = policy.get_action(context[t]) action_id = recommendation.action.id if desired_actions[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): # pylint: disable=too-many-locals n_rounds = 1000 context_dimension = 5 actions = [Action(i) for i in range(5)] action_ids = [0, 1, 2, 3, 4] context1, desired_actions1 = simulation.simulate_data(3000, context_dimension, actions, "Exp4P", random_state=0) experts = train_expert(context1, desired_actions1) # Parameter tuning tuning_region = np.arange(0.01, 1, 0.05) ctr_tuning = np.empty(len(tuning_region)) advice1 = get_advice(context1, action_ids, experts) for delta_i, delta in enumerate(tuning_region): historystorage = MemoryHistoryStorage() modelstorage = MemoryModelStorage() policy = Exp4P(actions, historystorage, modelstorage, delta=delta, p_min=None) cum_regret = simulation.evaluate_policy(policy, advice1, desired_actions1) ctr_tuning[delta_i] = n_rounds - cum_regret[-1] ctr_tuning /= n_rounds delta_opt = tuning_region[np.argmax(ctr_tuning)] simulation.plot_tuning_curve(tuning_region, ctr_tuning, label="delta changes") # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, actions, "Exp4P", random_state=1) advice2 = get_advice(context2, action_ids, experts) historystorage = MemoryHistoryStorage() modelstorage = MemoryModelStorage() policy = Exp4P(actions, historystorage, modelstorage, delta=delta_opt, p_min=None) for t in range(n_rounds): history_id, action = policy.get_action(advice2[t], 1) action_id = action[0]['action'].action_id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) random_state = np.random.RandomState(0) # Parameter tuning tuning_region = np.arange(0.01, 0.99, 0.1) ctr_delta = np.zeros(shape=len(tuning_region)) ctr_r = np.zeros(shape=len(tuning_region)) ctr_epsilon = np.zeros(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=0) for param_i, param in enumerate(tuning_region): policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=param, R=0.01, epsilon=0.5, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_delta[param_i] = n_rounds - cum_regret[-1] policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=0.5, R=param, epsilon=0.5, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_r[param_i] = n_rounds - cum_regret[-1] policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=0.5, R=0.01, epsilon=param, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_epsilon[param_i] = n_rounds - cum_regret[-1] ctr_delta /= n_rounds ctr_r /= n_rounds ctr_epsilon /= n_rounds delta_opt = tuning_region[np.argmax(ctr_delta)] r_opt = tuning_region[np.argmax(ctr_r)] epsilon_opt = tuning_region[np.argmax(ctr_epsilon)] # Plot the parameter tuning result plt.plot(np.arange(0.01, 0.99, 0.1), ctr_delta, 'ro-', label="delta changes, R = 0.01, eps = 0.5") plt.plot(np.arange(0.01, 0.99, 0.1), ctr_r, 'gs-', label="delta = 0.5, R = changes, eps = 0.5") plt.plot(np.arange(0.01, 0.99, 0.1), ctr_epsilon, 'b^-', label="delta = 0.5, R = 0.01, eps = changes") plt.xlabel('parameter value') plt.ylabel('CTR') plt.legend(bbox_to_anchor=(1., 0.7)) plt.ylim([0, 1]) plt.title("Parameter Tunning Curve - LinThompSamp") plt.show() # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=1) policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=delta_opt, R=r_opt, epsilon=epsilon_opt, random_state=random_state) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) random_state = np.random.RandomState(0) # Parameter tuning tuning_region = np.arange(0.01, 0.99, 0.1) ctr_delta = np.zeros(shape=len(tuning_region)) ctr_r = np.zeros(shape=len(tuning_region)) ctr_epsilon = np.zeros(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data( n_rounds, context_dimension, action_storage, random_state=0) for param_i, param in enumerate(tuning_region): policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=param, R=0.01, epsilon=0.5, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_delta[param_i] = n_rounds - cum_regret[-1] policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=0.5, R=param, epsilon=0.5, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_r[param_i] = n_rounds - cum_regret[-1] policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=0.5, R=0.01, epsilon=param, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_epsilon[param_i] = n_rounds - cum_regret[-1] ctr_delta /= n_rounds ctr_r /= n_rounds ctr_epsilon /= n_rounds delta_opt = tuning_region[np.argmax(ctr_delta)] r_opt = tuning_region[np.argmax(ctr_r)] epsilon_opt = tuning_region[np.argmax(ctr_epsilon)] # Plot the parameter tuning result plt.plot(np.arange(0.01, 0.99, 0.1), ctr_delta, 'ro-', label="delta changes, R = 0.01, eps = 0.5") plt.plot(np.arange(0.01, 0.99, 0.1), ctr_r, 'gs-', label="delta = 0.5, R = changes, eps = 0.5") plt.plot(np.arange(0.01, 0.99, 0.1), ctr_epsilon, 'b^-', label="delta = 0.5, R = 0.01, eps = changes") plt.xlabel('parameter value') plt.ylabel('CTR') plt.legend(bbox_to_anchor=(1., 0.7)) plt.ylim([0, 1]) plt.title("Parameter Tunning Curve - LinThompSamp") plt.show() # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data( n_rounds, context_dimension, action_storage, random_state=1) policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=delta_opt, R=r_opt, epsilon=epsilon_opt, random_state=random_state) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()