def get_data(): streaming_batch = pd.read_csv('streaming_batch.csv', sep='\t', names=['user_id'], engine='c') user_feature = pd.read_csv('user_feature.csv', sep='\t', header=0, index_col=0, engine='c') actions_id = list( pd.read_csv('actions.csv', sep='\t', header=0, engine='c')['movie_id']) reward_list = pd.read_csv('reward_list.csv', sep='\t', header=0, engine='c') action_context = pd.read_csv('action_context.csv', sep='\t', header=0, engine='c') tempactions = [] for key in actions_id: action = Action(key) tempactions.append(action) actions = MemoryActionStorage() actions.add(tempactions) return streaming_batch, user_feature, actions, reward_list, action_context
def setUp(self): super(TestUCB1, self).setUp() self.policy = UCB1(self.history_storage, self.model_storage, self.action_storage) self.policy_with_empty_action_storage = UCB1(MemoryHistoryStorage(), MemoryModelStorage(), MemoryActionStorage())
def setUp(self): super(TestExp3, self).setUp() self.gamma = 0.5 self.policy = Exp3(self.history_storage, self.model_storage, self.action_storage, gamma=self.gamma) self.policy_with_empty_action_storage = Exp3(MemoryHistoryStorage(), MemoryModelStorage(), MemoryActionStorage(), gamma=self.gamma)
def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) random_state = np.random.RandomState(0) # Parameter tuning tuning_region = np.arange(0.001, 1, 0.03) ctr_tuning = np.zeros(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=0) for gamma_i, gamma in enumerate(tuning_region): policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, gamma=gamma, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_tuning[gamma_i] = n_rounds - cum_regret[-1] ctr_tuning /= n_rounds gamma_opt = tuning_region[np.argmax(ctr_tuning)] simulation.plot_tuning_curve(tuning_region, ctr_tuning, label="gamma changes") # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=1) policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, gamma=gamma_opt, random_state=random_state) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) # Parameter tuning tuning_region = np.arange(0, 3, 0.05) ctr_tuning = np.empty(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=0) for alpha_i, alpha in enumerate(tuning_region): policy = LinUCB(history_storage=MemoryHistoryStorage(), model_storage=MemoryModelStorage(), action_storage=action_storage, context_dimension=context_dimension, alpha=alpha) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_tuning[alpha_i] = n_rounds - cum_regret[-1] ctr_tuning /= n_rounds alpha_opt = tuning_region[np.argmax(ctr_tuning)] simulation.plot_tuning_curve(tuning_region, ctr_tuning, label="alpha changes") # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=1) policy = LinUCB(history_storage=MemoryHistoryStorage(), model_storage=MemoryModelStorage(), action_storage=action_storage, context_dimension=context_dimension, alpha=alpha_opt) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def setUp(self): super(TestLinUCB, self).setUp() self.context_dimension = 2 self.alpha = 1. self.policy = LinUCB(self.history_storage, self.model_storage, self.action_storage, context_dimension=self.context_dimension, alpha=self.alpha) self.policy_with_empty_action_storage = LinUCB( MemoryHistoryStorage(), MemoryModelStorage(), MemoryActionStorage(), context_dimension=self.context_dimension, alpha=self.alpha)
def setUp(self): super(TestLinThompSamp, self).setUp() self.context_dimension = 2 self.delta = 0.5 self.R = 0.5 # pylint: disable=invalid-name self.epsilon = 0.1 self.policy = LinThompSamp(self.history_storage, self.model_storage, self.action_storage, context_dimension=self.context_dimension, delta=self.delta, R=self.R, epsilon=self.epsilon) self.policy_with_empty_action_storage = LinThompSamp( MemoryHistoryStorage(), MemoryModelStorage(), MemoryActionStorage(), context_dimension=self.context_dimension, delta=self.delta, R=self.R, epsilon=self.epsilon)
def main(): context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) # Regret Analysis n_rounds = 10000 context, desired_actions = simulation.simulate_data( n_rounds, context_dimension, action_storage, random_state=1) policy = UCB1(MemoryHistoryStorage(), MemoryModelStorage(), action_storage) for t in range(n_rounds): history_id, recommendation = policy.get_action(context[t]) action_id = recommendation.action.id if desired_actions[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) random_state = np.random.RandomState(0) # Parameter tuning tuning_region = np.arange(0.01, 0.99, 0.1) ctr_delta = np.zeros(shape=len(tuning_region)) ctr_r = np.zeros(shape=len(tuning_region)) ctr_epsilon = np.zeros(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=0) for param_i, param in enumerate(tuning_region): policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=param, R=0.01, epsilon=0.5, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_delta[param_i] = n_rounds - cum_regret[-1] policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=0.5, R=param, epsilon=0.5, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_r[param_i] = n_rounds - cum_regret[-1] policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=0.5, R=0.01, epsilon=param, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_epsilon[param_i] = n_rounds - cum_regret[-1] ctr_delta /= n_rounds ctr_r /= n_rounds ctr_epsilon /= n_rounds delta_opt = tuning_region[np.argmax(ctr_delta)] r_opt = tuning_region[np.argmax(ctr_r)] epsilon_opt = tuning_region[np.argmax(ctr_epsilon)] # Plot the parameter tuning result plt.plot(np.arange(0.01, 0.99, 0.1), ctr_delta, 'ro-', label="delta changes, R = 0.01, eps = 0.5") plt.plot(np.arange(0.01, 0.99, 0.1), ctr_r, 'gs-', label="delta = 0.5, R = changes, eps = 0.5") plt.plot(np.arange(0.01, 0.99, 0.1), ctr_epsilon, 'b^-', label="delta = 0.5, R = 0.01, eps = changes") plt.xlabel('parameter value') plt.ylabel('CTR') plt.legend(bbox_to_anchor=(1., 0.7)) plt.ylim([0, 1]) plt.title("Parameter Tunning Curve - LinThompSamp") plt.show() # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=1) policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=delta_opt, R=r_opt, epsilon=epsilon_opt, random_state=random_state) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def setUp(self): # pylint: disable=invalid-name self.model_storage = MemoryModelStorage() self.history_storage = MemoryHistoryStorage() self.action_storage = MemoryActionStorage() self.actions = [Action(i + 1) for i in range(3)] self.action_storage.add(self.actions)