def test_add_action(self): policy = self.policy history_id, _ = policy.get_action(context=None, n_actions=2) new_actions = [Action() for i in range(2)] policy.add_action(new_actions) self.assertEqual( len(new_actions) + len(self.actions), policy._action_storage.count()) policy.reward(history_id, {3: 1}) model = policy._model_storage.get_model() for action in new_actions: self.assertEqual(model['total_action_reward'][action.id], 1.0) self.assertEqual(model['action_times'][action.id], 1) self.assertEqual(model['n_rounds'], len(self.actions) + len(new_actions) + 1) history_id2, recommendations = policy.get_action(context=None, n_actions=4) self.assertEqual(len(recommendations), 4) policy.reward(history_id2, { new_actions[0].id: 4, new_actions[1].id: 5 }) model = policy._model_storage.get_model() for action in new_actions: self.assertNotEqual(model['total_action_reward'][action.id], 1.0) self.assertEqual(model['action_times'][action.id], 2) self.assertEqual(model['n_rounds'], len(self.actions) + len(new_actions) + 1 + 2)
def get_data(): streaming_batch = pd.read_csv('streaming_batch.csv', sep='\t', names=['user_id'], engine='c') user_feature = pd.read_csv('user_feature.csv', sep='\t', header=0, index_col=0, engine='c') actions_id = list( pd.read_csv('actions.csv', sep='\t', header=0, engine='c')['movie_id']) reward_list = pd.read_csv('reward_list.csv', sep='\t', header=0, engine='c') action_context = pd.read_csv('action_context.csv', sep='\t', header=0, engine='c') tempactions = [] for key in actions_id: action = Action(key) tempactions.append(action) actions = MemoryActionStorage() actions.add(tempactions) return streaming_batch, user_feature, actions, reward_list, action_context
def test_add_action(self): policy = self.policy context1 = {1: [1, 1], 2: [2, 2], 3: [3, 3]} history_id, _ = policy.get_action(context1, 2) new_actions = [Action() for i in range(2)] policy.add_action(new_actions) self.assertEqual( len(new_actions) + len(self.actions), policy._action_storage.count()) policy.reward(history_id, {3: 1}) model = policy._model_storage.get_model() for action in new_actions: self.assertTrue((model['A'][action.id] == np.identity( self.context_dimension)).all()) context2 = {1: [1, 1], 2: [2, 2], 3: [3, 3], 4: [4, 4], 5: [5, 5]} history_id2, recommendations = policy.get_action(context2, 4) self.assertEqual(len(recommendations), 4) policy.reward(history_id2, { new_actions[0].id: 4, new_actions[1].id: 5 }) model = policy._model_storage.get_model() for action in new_actions: self.assertFalse((model['A'][action.id] == np.identity(2)).all())
def test_add_action(self): policy = self.policy history_id, recommendations = policy.get_action( context=None, n_actions=2) new_actions = [Action() for i in range(2)] policy.add_action(new_actions) self.assertEqual( len(new_actions) + len(self.actions), policy._action_storage.count()) policy.reward(history_id, {recommendations[0].action.id: 1.}) model = policy._model_storage.get_model() for action in new_actions: self.assertEqual(model['w'][action.id], 1.0) history_id2, recommendations2 = policy.get_action( context=None, n_actions=-1) self.assertEqual( len(recommendations2), len(new_actions) + len(self.actions)) policy.reward(history_id2, { new_actions[0].id: 4, new_actions[1].id: 5 }) model = policy._model_storage.get_model() for action in new_actions: self.assertGreater(model['w'][action.id], 1.0)
def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) random_state = np.random.RandomState(0) # Parameter tuning tuning_region = np.arange(0.001, 1, 0.03) ctr_tuning = np.zeros(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=0) for gamma_i, gamma in enumerate(tuning_region): policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, gamma=gamma, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_tuning[gamma_i] = n_rounds - cum_regret[-1] ctr_tuning /= n_rounds gamma_opt = tuning_region[np.argmax(ctr_tuning)] simulation.plot_tuning_curve(tuning_region, ctr_tuning, label="gamma changes") # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=1) policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, gamma=gamma_opt, random_state=random_state) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) # Parameter tuning tuning_region = np.arange(0, 3, 0.05) ctr_tuning = np.empty(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=0) for alpha_i, alpha in enumerate(tuning_region): policy = LinUCB(history_storage=MemoryHistoryStorage(), model_storage=MemoryModelStorage(), action_storage=action_storage, context_dimension=context_dimension, alpha=alpha) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_tuning[alpha_i] = n_rounds - cum_regret[-1] ctr_tuning /= n_rounds alpha_opt = tuning_region[np.argmax(ctr_tuning)] simulation.plot_tuning_curve(tuning_region, ctr_tuning, label="alpha changes") # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=1) policy = LinUCB(history_storage=MemoryHistoryStorage(), model_storage=MemoryModelStorage(), action_storage=action_storage, context_dimension=context_dimension, alpha=alpha_opt) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def test_add_action(self): policy = self.policy context1 = {1: [1, 1], 2: [2, 2], 3: [3, 3]} history_id, _ = policy.get_action(context1, 2) new_actions = [Action() for i in range(2)] policy.add_action(new_actions) self.assertEqual( len(new_actions) + len(self.actions), policy._action_storage.count()) policy.reward(history_id, {3: 1}) context2 = {1: [1, 1], 2: [2, 2], 3: [3, 3], 4: [4, 4], 5: [5, 5]} history_id2, recommendations = policy.get_action(context2, 4) self.assertEqual(len(recommendations), 4) policy.reward(history_id2, { new_actions[0].id: 4, new_actions[1].id: 5 })
def main(): context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) # Regret Analysis n_rounds = 10000 context, desired_actions = simulation.simulate_data( n_rounds, context_dimension, action_storage, random_state=1) policy = UCB1(MemoryHistoryStorage(), MemoryModelStorage(), action_storage) for t in range(n_rounds): history_id, recommendation = policy.get_action(context[t]) action_id = recommendation.action.id if desired_actions[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): n_rounds = 1000 context_dimension = 5 action_storage = MemoryActionStorage() action_storage.add([Action(i) for i in range(5)]) random_state = np.random.RandomState(0) # Parameter tuning tuning_region = np.arange(0.01, 0.99, 0.1) ctr_delta = np.zeros(shape=len(tuning_region)) ctr_r = np.zeros(shape=len(tuning_region)) ctr_epsilon = np.zeros(shape=len(tuning_region)) context1, desired_actions1 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=0) for param_i, param in enumerate(tuning_region): policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=param, R=0.01, epsilon=0.5, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_delta[param_i] = n_rounds - cum_regret[-1] policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=0.5, R=param, epsilon=0.5, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_r[param_i] = n_rounds - cum_regret[-1] policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=0.5, R=0.01, epsilon=param, random_state=random_state) cum_regret = simulation.evaluate_policy(policy, context1, desired_actions1) ctr_epsilon[param_i] = n_rounds - cum_regret[-1] ctr_delta /= n_rounds ctr_r /= n_rounds ctr_epsilon /= n_rounds delta_opt = tuning_region[np.argmax(ctr_delta)] r_opt = tuning_region[np.argmax(ctr_r)] epsilon_opt = tuning_region[np.argmax(ctr_epsilon)] # Plot the parameter tuning result plt.plot(np.arange(0.01, 0.99, 0.1), ctr_delta, 'ro-', label="delta changes, R = 0.01, eps = 0.5") plt.plot(np.arange(0.01, 0.99, 0.1), ctr_r, 'gs-', label="delta = 0.5, R = changes, eps = 0.5") plt.plot(np.arange(0.01, 0.99, 0.1), ctr_epsilon, 'b^-', label="delta = 0.5, R = 0.01, eps = changes") plt.xlabel('parameter value') plt.ylabel('CTR') plt.legend(bbox_to_anchor=(1., 0.7)) plt.ylim([0, 1]) plt.title("Parameter Tunning Curve - LinThompSamp") plt.show() # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, action_storage, random_state=1) policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(), action_storage, context_dimension=context_dimension, delta=delta_opt, R=r_opt, epsilon=epsilon_opt, random_state=random_state) for t in range(n_rounds): history_id, recommendation = policy.get_action(context2[t]) action_id = recommendation.action.id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def main(): # pylint: disable=too-many-locals n_rounds = 1000 context_dimension = 5 actions = [Action(i) for i in range(5)] action_ids = [0, 1, 2, 3, 4] context1, desired_actions1 = simulation.simulate_data(3000, context_dimension, actions, "Exp4P", random_state=0) experts = train_expert(context1, desired_actions1) # Parameter tuning tuning_region = np.arange(0.01, 1, 0.05) ctr_tuning = np.empty(len(tuning_region)) advice1 = get_advice(context1, action_ids, experts) for delta_i, delta in enumerate(tuning_region): historystorage = MemoryHistoryStorage() modelstorage = MemoryModelStorage() policy = Exp4P(actions, historystorage, modelstorage, delta=delta, p_min=None) cum_regret = simulation.evaluate_policy(policy, advice1, desired_actions1) ctr_tuning[delta_i] = n_rounds - cum_regret[-1] ctr_tuning /= n_rounds delta_opt = tuning_region[np.argmax(ctr_tuning)] simulation.plot_tuning_curve(tuning_region, ctr_tuning, label="delta changes") # Regret Analysis n_rounds = 10000 context2, desired_actions2 = simulation.simulate_data(n_rounds, context_dimension, actions, "Exp4P", random_state=1) advice2 = get_advice(context2, action_ids, experts) historystorage = MemoryHistoryStorage() modelstorage = MemoryModelStorage() policy = Exp4P(actions, historystorage, modelstorage, delta=delta_opt, p_min=None) for t in range(n_rounds): history_id, action = policy.get_action(advice2[t], 1) action_id = action[0]['action'].action_id if desired_actions2[t] != action_id: policy.reward(history_id, {action_id: 0}) else: policy.reward(history_id, {action_id: 1}) policy.plot_avg_regret() plt.show()
def test_add_action_from_empty_change_storage(self): policy = self.policy_with_empty_action_storage new_actions = [Action() for i in range(2)] policy.add_action(new_actions) self.assertEqual(set(a.id for a in new_actions), set(policy._action_storage.iterids()))
def test_add_action_change_storage(self): policy = self.policy new_actions = [Action() for i in range(2)] policy.add_action(new_actions) self.assertEqual(set(a.id for a in self.actions + new_actions), set(self.action_storage.iterids()))
def setUp(self): # pylint: disable=invalid-name self.model_storage = MemoryModelStorage() self.history_storage = MemoryHistoryStorage() self.action_storage = MemoryActionStorage() self.actions = [Action(i + 1) for i in range(3)] self.action_storage.add(self.actions)