Python MemoryActionStorage Beispiele, striatum.storage.MemoryActionStorage Python Beispiele

Beispiel #1

0

Datei anzeigen

def get_data():
    streaming_batch = pd.read_csv('streaming_batch.csv',
                                  sep='\t',
                                  names=['user_id'],
                                  engine='c')
    user_feature = pd.read_csv('user_feature.csv',
                               sep='\t',
                               header=0,
                               index_col=0,
                               engine='c')
    actions_id = list(
        pd.read_csv('actions.csv', sep='\t', header=0, engine='c')['movie_id'])
    reward_list = pd.read_csv('reward_list.csv',
                              sep='\t',
                              header=0,
                              engine='c')
    action_context = pd.read_csv('action_context.csv',
                                 sep='\t',
                                 header=0,
                                 engine='c')

    tempactions = []
    for key in actions_id:
        action = Action(key)
        tempactions.append(action)
    actions = MemoryActionStorage()
    actions.add(tempactions)
    return streaming_batch, user_feature, actions, reward_list, action_context

Beispiel #2

0

Datei anzeigen

def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.001, 1, 0.03)
    ctr_tuning = np.zeros(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)
    for gamma_i, gamma in enumerate(tuning_region):
        policy = Exp3(MemoryHistoryStorage(),
                      MemoryModelStorage(),
                      action_storage,
                      gamma=gamma,
                      random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[gamma_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    gamma_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="gamma changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = Exp3(MemoryHistoryStorage(),
                  MemoryModelStorage(),
                  action_storage,
                  gamma=gamma_opt,
                  random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()

Beispiel #3

0

Datei anzeigen

Datei: simulation_linucb.py Projekt: caozheng1127/bandit

def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])

    # Parameter tuning
    tuning_region = np.arange(0, 3, 0.05)
    ctr_tuning = np.empty(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)
    for alpha_i, alpha in enumerate(tuning_region):
        policy = LinUCB(history_storage=MemoryHistoryStorage(),
                        model_storage=MemoryModelStorage(),
                        action_storage=action_storage,
                        context_dimension=context_dimension,
                        alpha=alpha)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[alpha_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    alpha_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region,
                                 ctr_tuning,
                                 label="alpha changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = LinUCB(history_storage=MemoryHistoryStorage(),
                    model_storage=MemoryModelStorage(),
                    action_storage=action_storage,
                    context_dimension=context_dimension,
                    alpha=alpha_opt)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()

Beispiel #4

0

Datei anzeigen

 def setUp(self):
     super(TestUCB1, self).setUp()
     self.policy = UCB1(self.history_storage, self.model_storage,
                        self.action_storage)
     self.policy_with_empty_action_storage = UCB1(MemoryHistoryStorage(),
                                                  MemoryModelStorage(),
                                                  MemoryActionStorage())

Beispiel #5

0

Datei anzeigen

 def setUp(self):
     super(TestExp3, self).setUp()
     self.gamma = 0.5
     self.policy = Exp3(self.history_storage,
                        self.model_storage,
                        self.action_storage,
                        gamma=self.gamma)
     self.policy_with_empty_action_storage = Exp3(MemoryHistoryStorage(),
                                                  MemoryModelStorage(),
                                                  MemoryActionStorage(),
                                                  gamma=self.gamma)

Beispiel #6

0

Datei anzeigen

Datei: simulation_exp3.py Projekt: ntucllab/striatum

def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.001, 1, 0.03)
    ctr_tuning = np.zeros(shape=len(tuning_region))
    context1, desired_actions1 = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=0)
    for gamma_i, gamma in enumerate(tuning_region):
        policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(),
                      action_storage, gamma=gamma, random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_tuning[gamma_i] = n_rounds - cum_regret[-1]
    ctr_tuning /= n_rounds
    gamma_opt = tuning_region[np.argmax(ctr_tuning)]
    simulation.plot_tuning_curve(tuning_region, ctr_tuning,
                                 label="gamma changes")

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=1)
    policy = Exp3(MemoryHistoryStorage(), MemoryModelStorage(),
                  action_storage, gamma=gamma_opt, random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()

Beispiel #7

0

Datei anzeigen

Datei: simulation_ucb1.py Projekt: caozheng1127/bandit

def main():
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])

    # Regret Analysis
    n_rounds = 10000
    context, desired_actions = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=1)
    policy = UCB1(MemoryHistoryStorage(), MemoryModelStorage(),
                  action_storage)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context[t])
        action_id = recommendation.action.id
        if desired_actions[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()

Beispiel #8

0

Datei anzeigen

 def setUp(self):
     super(TestLinUCB, self).setUp()
     self.context_dimension = 2
     self.alpha = 1.
     self.policy = LinUCB(self.history_storage,
                          self.model_storage,
                          self.action_storage,
                          context_dimension=self.context_dimension,
                          alpha=self.alpha)
     self.policy_with_empty_action_storage = LinUCB(
         MemoryHistoryStorage(),
         MemoryModelStorage(),
         MemoryActionStorage(),
         context_dimension=self.context_dimension,
         alpha=self.alpha)

Beispiel #9

0

Datei anzeigen

 def setUp(self):
     super(TestLinThompSamp, self).setUp()
     self.context_dimension = 2
     self.delta = 0.5
     self.R = 0.5  # pylint: disable=invalid-name
     self.epsilon = 0.1
     self.policy = LinThompSamp(self.history_storage,
                                self.model_storage,
                                self.action_storage,
                                context_dimension=self.context_dimension,
                                delta=self.delta,
                                R=self.R,
                                epsilon=self.epsilon)
     self.policy_with_empty_action_storage = LinThompSamp(
         MemoryHistoryStorage(),
         MemoryModelStorage(),
         MemoryActionStorage(),
         context_dimension=self.context_dimension,
         delta=self.delta,
         R=self.R,
         epsilon=self.epsilon)

Beispiel #10

0

Datei anzeigen

def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.01, 0.99, 0.1)
    ctr_delta = np.zeros(shape=len(tuning_region))
    ctr_r = np.zeros(shape=len(tuning_region))
    ctr_epsilon = np.zeros(shape=len(tuning_region))

    context1, desired_actions1 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=0)

    for param_i, param in enumerate(tuning_region):
        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=param,
                              R=0.01,
                              epsilon=0.5,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_delta[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5,
                              R=param,
                              epsilon=0.5,
                              random_state=random_state)

        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_r[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(),
                              MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5,
                              R=0.01,
                              epsilon=param,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_epsilon[param_i] = n_rounds - cum_regret[-1]

    ctr_delta /= n_rounds
    ctr_r /= n_rounds
    ctr_epsilon /= n_rounds

    delta_opt = tuning_region[np.argmax(ctr_delta)]
    r_opt = tuning_region[np.argmax(ctr_r)]
    epsilon_opt = tuning_region[np.argmax(ctr_epsilon)]

    # Plot the parameter tuning result
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_delta,
             'ro-',
             label="delta changes, R = 0.01, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_r,
             'gs-',
             label="delta = 0.5, R = changes, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1),
             ctr_epsilon,
             'b^-',
             label="delta = 0.5, R = 0.01, eps = changes")
    plt.xlabel('parameter value')
    plt.ylabel('CTR')
    plt.legend(bbox_to_anchor=(1., 0.7))
    plt.ylim([0, 1])
    plt.title("Parameter Tunning Curve - LinThompSamp")
    plt.show()

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(n_rounds,
                                                          context_dimension,
                                                          action_storage,
                                                          random_state=1)
    policy = LinThompSamp(MemoryHistoryStorage(),
                          MemoryModelStorage(),
                          action_storage,
                          context_dimension=context_dimension,
                          delta=delta_opt,
                          R=r_opt,
                          epsilon=epsilon_opt,
                          random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()

Beispiel #11

0

Datei anzeigen

Datei: base_bandit_test.py Projekt: ntucllab/striatum

 def setUp(self):  # pylint: disable=invalid-name
     self.model_storage = MemoryModelStorage()
     self.history_storage = MemoryHistoryStorage()
     self.action_storage = MemoryActionStorage()
     self.actions = [Action(i + 1) for i in range(3)]
     self.action_storage.add(self.actions)

Beispiel #12

0

Datei anzeigen

Datei: base_bandit_test.py Projekt: ntucllab/striatum

class BaseBanditTest(object):
    # pylint: disable=protected-access

    def setUp(self):  # pylint: disable=invalid-name
        self.model_storage = MemoryModelStorage()
        self.history_storage = MemoryHistoryStorage()
        self.action_storage = MemoryActionStorage()
        self.actions = [Action(i + 1) for i in range(3)]
        self.action_storage.add(self.actions)

    def test_initialization(self):
        policy = self.policy
        self.assertEqual(self.model_storage, policy._model_storage)
        self.assertEqual(self.history_storage, policy._history_storage)
        self.assertEqual(self.history_storage, policy.history_storage)
        self.assertEqual(self.action_storage, policy._action_storage)

    def test_get_action_with_empty_storage(self):
        policy = self.policy_with_empty_action_storage
        context = {}
        history_id, recommendations = policy.get_action(context, 1)
        self.assertEqual(history_id, 0)
        self.assertEqual(len(recommendations), 0)
        self.assertDictEqual(
            policy._history_storage.get_unrewarded_history(history_id).context,
            context)

    def test_get_first_action(self):
        policy = self.policy
        context = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, recommendations = policy.get_action(context, 1)
        self.assertEqual(history_id, 0)
        self.assertEqual(len(recommendations), 1)
        self.assertIn(recommendations[0].action.id,
                      self.action_storage.iterids())
        self.assertEqual(
            policy._history_storage.get_unrewarded_history(history_id).context,
            context)

    def test_get_action_with_n_actions_none(self):
        policy = self.policy
        context = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, recommendations = policy.get_action(context, None)
        self.assertEqual(history_id, 0)
        self.assertIsInstance(recommendations, Recommendation)
        self.assertIn(recommendations.action.id,
                      self.action_storage.iterids())
        self.assertEqual(
            policy._history_storage.get_unrewarded_history(history_id).context,
            context)

    def test_get_all_action(self):
        policy = self.policy
        context = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, recommendations = policy.get_action(context, -1)
        self.assertEqual(history_id, 0)
        self.assertEqual(len(recommendations), len(self.actions))
        for rec in recommendations:
            self.assertIn(rec.action.id, self.action_storage.iterids())
        self.assertEqual(
            policy._history_storage.get_unrewarded_history(history_id).context,
            context)

    def test_get_multiple_action(self):
        policy = self.policy
        n_actions = 2
        context = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, recommendations = policy.get_action(context, n_actions)
        self.assertEqual(history_id, 0)
        self.assertEqual(len(recommendations), n_actions)
        for rec in recommendations:
            self.assertIn(rec.action.id, self.action_storage.iterids())
        self.assertEqual(
            policy._history_storage.get_unrewarded_history(history_id).context,
            context)

    def test_update_reward(self):
        policy = self.policy
        context = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, recommendations = policy.get_action(context, 1)
        rewards = {recommendations[0].action.id: 1.}
        policy.reward(history_id, rewards)
        self.assertEqual(
            policy._history_storage.get_history(history_id).rewards, rewards)

    def test_delay_reward(self):
        policy = self.policy
        context1 = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        context2 = {1: [0, 0], 2: [3, 3], 3: [6, 6]}
        history_id1, recommendations1 = policy.get_action(context1, 2)
        self.assertEqual(len(recommendations1), 2)
        history_id2, recommendations2 = policy.get_action(context2, 1)
        self.assertEqual(len(recommendations2), 1)

        rewards = {
            recommendations1[0].action.id: 0.,
            recommendations1[1].action.id: 1.,
        }
        policy.reward(history_id1, rewards)
        self.assertDictEqual(
            policy._history_storage.get_history(history_id1).context, context1)
        self.assertDictEqual(
            policy._history_storage.get_unrewarded_history(history_id2).context,
            context2)
        self.assertDictEqual(
            policy._history_storage.get_history(history_id1).rewards, rewards)
        self.assertDictEqual(
            policy._history_storage.get_unrewarded_history(history_id2).rewards,
            {})

    def test_reward_order_descending(self):
        policy = self.policy
        context1 = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        context2 = {1: [0, 0], 2: [3, 3], 3: [6, 6]}
        history_id1, _ = policy.get_action(context1, 2)
        history_id2, recommendations2 = policy.get_action(context2)
        rewards = {recommendations2.action.id: 1.}
        policy.reward(history_id2, rewards)
        self.assertDictEqual(
            policy._history_storage.get_unrewarded_history(history_id1).context,
            context1)
        self.assertDictEqual(
            policy._history_storage.get_history(history_id2).context, context2)
        self.assertDictEqual(
            policy._history_storage.get_unrewarded_history(history_id1).rewards,
            {})
        self.assertDictEqual(
            policy._history_storage.get_history(history_id2).rewards, rewards)

    def test_update_action(self):
        action = self.actions[1]
        action.action_type = "text"
        action.action_text = "hello"
        self.policy.update_action(action)
        updated_action = self.action_storage.get(action.id)
        self.assertEqual(updated_action.action_type, action.action_type)
        self.assertEqual(updated_action.action_text, action.action_text)

Beispiel #13

0

Datei anzeigen

Datei: base_bandit_test.py Projekt: shampp/qr_cmab

 def setUp(self):  # pylint: disable=invalid-name
     self.model_storage = MemoryModelStorage()
     self.history_storage = MemoryHistoryStorage()
     self.action_storage = MemoryActionStorage()
     self.actions = [Action(i + 1) for i in range(3)]
     self.action_storage.add(self.actions)

Beispiel #14

0

Datei anzeigen

Datei: base_bandit_test.py Projekt: shampp/qr_cmab

class BaseBanditTest(object):
    # pylint: disable=protected-access

    def setUp(self):  # pylint: disable=invalid-name
        self.model_storage = MemoryModelStorage()
        self.history_storage = MemoryHistoryStorage()
        self.action_storage = MemoryActionStorage()
        self.actions = [Action(i + 1) for i in range(3)]
        self.action_storage.add(self.actions)

    def test_initialization(self):
        policy = self.policy
        self.assertEqual(self.model_storage, policy._model_storage)
        self.assertEqual(self.history_storage, policy._history_storage)
        self.assertEqual(self.history_storage, policy.history_storage)
        self.assertEqual(self.action_storage, policy._action_storage)

    def test_get_action_with_empty_storage(self):
        policy = self.policy_with_empty_action_storage
        context = {}
        history_id, recommendations = policy.get_action(context, 1)
        self.assertEqual(history_id, 0)
        self.assertEqual(len(recommendations), 0)
        self.assertDictEqual(
            policy._history_storage.get_unrewarded_history(history_id).context,
            context)

    def test_get_first_action(self):
        policy = self.policy
        context = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, recommendations = policy.get_action(context, 1)
        self.assertEqual(history_id, 0)
        self.assertEqual(len(recommendations), 1)
        self.assertIn(recommendations[0].action.id,
                      self.action_storage.iterids())
        self.assertEqual(
            policy._history_storage.get_unrewarded_history(history_id).context,
            context)

    def test_get_action_with_n_actions_none(self):
        policy = self.policy
        context = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, recommendations = policy.get_action(context, None)
        self.assertEqual(history_id, 0)
        self.assertIsInstance(recommendations, Recommendation)
        self.assertIn(recommendations.action.id, self.action_storage.iterids())
        self.assertEqual(
            policy._history_storage.get_unrewarded_history(history_id).context,
            context)

    def test_get_all_action(self):
        policy = self.policy
        context = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, recommendations = policy.get_action(context, -1)
        self.assertEqual(history_id, 0)
        self.assertEqual(len(recommendations), len(self.actions))
        for rec in recommendations:
            self.assertIn(rec.action.id, self.action_storage.iterids())
        self.assertEqual(
            policy._history_storage.get_unrewarded_history(history_id).context,
            context)

    def test_get_multiple_action(self):
        policy = self.policy
        n_actions = 2
        context = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, recommendations = policy.get_action(context, n_actions)
        self.assertEqual(history_id, 0)
        self.assertEqual(len(recommendations), n_actions)
        for rec in recommendations:
            self.assertIn(rec.action.id, self.action_storage.iterids())
        self.assertEqual(
            policy._history_storage.get_unrewarded_history(history_id).context,
            context)

    def test_update_reward(self):
        policy = self.policy
        context = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        history_id, recommendations = policy.get_action(context, 1)
        rewards = {recommendations[0].action.id: 1.}
        policy.reward(history_id, rewards)
        self.assertEqual(
            policy._history_storage.get_history(history_id).rewards, rewards)

    def test_delay_reward(self):
        policy = self.policy
        context1 = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        context2 = {1: [0, 0], 2: [3, 3], 3: [6, 6]}
        history_id1, recommendations1 = policy.get_action(context1, 2)
        self.assertEqual(len(recommendations1), 2)
        history_id2, recommendations2 = policy.get_action(context2, 1)
        self.assertEqual(len(recommendations2), 1)

        rewards = {
            recommendations1[0].action.id: 0.,
            recommendations1[1].action.id: 1.,
        }
        policy.reward(history_id1, rewards)
        self.assertDictEqual(
            policy._history_storage.get_history(history_id1).context, context1)
        self.assertDictEqual(
            policy._history_storage.get_unrewarded_history(
                history_id2).context, context2)
        self.assertDictEqual(
            policy._history_storage.get_history(history_id1).rewards, rewards)
        self.assertDictEqual(
            policy._history_storage.get_unrewarded_history(
                history_id2).rewards, {})

    def test_reward_order_descending(self):
        policy = self.policy
        context1 = {1: [1, 1], 2: [2, 2], 3: [3, 3]}
        context2 = {1: [0, 0], 2: [3, 3], 3: [6, 6]}
        history_id1, _ = policy.get_action(context1, 2)
        history_id2, recommendations2 = policy.get_action(context2)
        rewards = {recommendations2.action.id: 1.}
        policy.reward(history_id2, rewards)
        self.assertDictEqual(
            policy._history_storage.get_unrewarded_history(
                history_id1).context, context1)
        self.assertDictEqual(
            policy._history_storage.get_history(history_id2).context, context2)
        self.assertDictEqual(
            policy._history_storage.get_unrewarded_history(
                history_id1).rewards, {})
        self.assertDictEqual(
            policy._history_storage.get_history(history_id2).rewards, rewards)

    def test_update_action(self):
        action = self.actions[1]
        action.action_type = "text"
        action.action_text = "hello"
        self.policy.update_action(action)
        updated_action = self.action_storage.get(action.id)
        self.assertEqual(updated_action.action_type, action.action_type)
        self.assertEqual(updated_action.action_text, action.action_text)

Beispiel #15

0

Datei anzeigen

Datei: simulation_linthompsamp.py Projekt: ntucllab/striatum

def main():
    n_rounds = 1000
    context_dimension = 5
    action_storage = MemoryActionStorage()
    action_storage.add([Action(i) for i in range(5)])
    random_state = np.random.RandomState(0)

    # Parameter tuning
    tuning_region = np.arange(0.01, 0.99, 0.1)
    ctr_delta = np.zeros(shape=len(tuning_region))
    ctr_r = np.zeros(shape=len(tuning_region))
    ctr_epsilon = np.zeros(shape=len(tuning_region))

    context1, desired_actions1 = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=0)

    for param_i, param in enumerate(tuning_region):
        policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=param, R=0.01, epsilon=0.5,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_delta[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5, R=param, epsilon=0.5,
                              random_state=random_state)

        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_r[param_i] = n_rounds - cum_regret[-1]

        policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(),
                              action_storage,
                              context_dimension=context_dimension,
                              delta=0.5, R=0.01, epsilon=param,
                              random_state=random_state)
        cum_regret = simulation.evaluate_policy(policy, context1,
                                                desired_actions1)
        ctr_epsilon[param_i] = n_rounds - cum_regret[-1]

    ctr_delta /= n_rounds
    ctr_r /= n_rounds
    ctr_epsilon /= n_rounds

    delta_opt = tuning_region[np.argmax(ctr_delta)]
    r_opt = tuning_region[np.argmax(ctr_r)]
    epsilon_opt = tuning_region[np.argmax(ctr_epsilon)]

    # Plot the parameter tuning result
    plt.plot(np.arange(0.01, 0.99, 0.1), ctr_delta, 'ro-',
             label="delta changes, R = 0.01, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1), ctr_r, 'gs-',
             label="delta = 0.5, R = changes, eps = 0.5")
    plt.plot(np.arange(0.01, 0.99, 0.1), ctr_epsilon, 'b^-',
             label="delta = 0.5, R = 0.01, eps = changes")
    plt.xlabel('parameter value')
    plt.ylabel('CTR')
    plt.legend(bbox_to_anchor=(1., 0.7))
    plt.ylim([0, 1])
    plt.title("Parameter Tunning Curve - LinThompSamp")
    plt.show()

    # Regret Analysis
    n_rounds = 10000
    context2, desired_actions2 = simulation.simulate_data(
        n_rounds, context_dimension, action_storage, random_state=1)
    policy = LinThompSamp(MemoryHistoryStorage(), MemoryModelStorage(),
                          action_storage,
                          context_dimension=context_dimension,
                          delta=delta_opt, R=r_opt, epsilon=epsilon_opt,
                          random_state=random_state)

    for t in range(n_rounds):
        history_id, recommendation = policy.get_action(context2[t])
        action_id = recommendation.action.id
        if desired_actions2[t] != action_id:
            policy.reward(history_id, {action_id: 0})
        else:
            policy.reward(history_id, {action_id: 1})

    policy.plot_avg_regret()
    plt.show()