Ejemplo n.º 1
0
    def test_UCB1_c2_backend(self):
        rng = np.random.RandomState(seed=111)
        contexts_history = rng.randint(0, 5, (10, 5))
        contexts = rng.randint(0, 5, (10, 5))

        arm, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 4],
            rewards=[1, 0, 0, 1, 0, 0, 1, 0, 0, 0],
            learning_policy=LearningPolicy.UCB1(alpha=0.1),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=contexts_history,
            contexts=contexts,
            seed=123456,
            num_run=5,
            is_predict=True,
            n_jobs=2,
            backend=None)

        self.assertEqual(arm,
                         [[3, 3, 1, 1, 1, 1, 3, 1, 3, 3] for _ in range(5)])

        arm, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 4],
            rewards=[1, 0, 0, 1, 0, 0, 1, 0, 0, 0],
            learning_policy=LearningPolicy.UCB1(alpha=0.1),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=contexts_history,
            contexts=contexts,
            seed=123456,
            num_run=5,
            is_predict=True,
            n_jobs=2,
            backend='loky')

        self.assertEqual(arm,
                         [[3, 3, 1, 1, 1, 1, 3, 1, 3, 3] for _ in range(5)])

        arm, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 4],
            rewards=[1, 0, 0, 1, 0, 0, 1, 0, 0, 0],
            learning_policy=LearningPolicy.UCB1(alpha=0.1),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=contexts_history,
            contexts=contexts,
            seed=123456,
            num_run=5,
            is_predict=True,
            n_jobs=2,
            backend='threading')

        self.assertEqual(arm,
                         [[3, 3, 1, 1, 1, 1, 3, 1, 3, 3] for _ in range(5)])
Ejemplo n.º 2
0
    def test_thompson_n3(self):
        rng = np.random.RandomState(seed=7)

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.ThompsonSampling(),
            neighborhood_policy=NeighborhoodPolicy.Clusters(3),
            context_history=[[rng.random_sample() for _ in range(5)]
                             for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=1)

        self.assertListEqual(arms, [1, 1, 1, 3, 1, 3, 1, 1, 3, 1])

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.ThompsonSampling(),
            neighborhood_policy=NeighborhoodPolicy.Clusters(3),
            context_history=[[rng.random_sample() for _ in range(5)]
                             for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2)

        self.assertListEqual(arms, [1, 1, 3, 1, 1, 1, 1, 2, 1, 2])

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 2, 1, 2, 1, 2, 1, 2, 1, 2],
            rewards=[1, 1, 0, 0, 0, 0, 1, 1, 0, 0],
            learning_policy=LearningPolicy.ThompsonSampling(),
            neighborhood_policy=NeighborhoodPolicy.Clusters(3),
            context_history=[[rng.random_sample() for _ in range(5)]
                             for _ in range(10)],
            contexts=[[1, 1, 1, 1, 1] for _ in range(10)],
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=-1)

        self.assertListEqual(arms, [2, 1, 3, 3, 3, 2, 2, 3, 2, 3])
Ejemplo n.º 3
0
    def test_partial_fit_greedy0_n3(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Clusters(3),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertListEqual(arms, [3, 1])
        self.assertEqual(len(mab._imp.decisions), 10)
        self.assertEqual(len(mab._imp.rewards), 10)
        self.assertEqual(len(mab._imp.contexts), 10)
        self.assertEqual(np.ndim(mab._imp.decisions), 1)

        decisions2 = [1, 2, 3]
        rewards2 = [1, 1, 1]
        context_history2 = [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0]]
        mab.partial_fit(decisions2, rewards2, context_history2)

        self.assertEqual(len(mab._imp.decisions), 13)
        self.assertEqual(len(mab._imp.rewards), 13)
        self.assertEqual(len(mab._imp.contexts), 13)
        self.assertEqual(np.ndim(mab._imp.decisions), 1)
Ejemplo n.º 4
0
    def test_copy(self):
        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        clusters = deepcopy(mab._imp)
        self.assertIsNot(clusters, mab._imp)
        self.assertIsInstance(clusters.lp_list[0], _EpsilonGreedy)
        self.assertIsInstance(clusters.lp_list[1], _EpsilonGreedy)
        self.assertIsInstance(clusters.kmeans, KMeans)
        self.assertIsNot(clusters.kmeans, mab._imp.kmeans)
        self.assertIsNot(clusters.lp_list[0], mab._imp.lp_list[0])
        self.assertIsNot(clusters.lp_list[1], mab._imp.lp_list[1])
        self.assertEqual(clusters.lp_list[0].epsilon,
                         mab._imp.lp_list[0].epsilon)
        self.assertEqual(clusters.lp_list[1].epsilon,
                         mab._imp.lp_list[1].epsilon)
Ejemplo n.º 5
0
    def test_greedy0_n2_single(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arms, 3)
Ejemplo n.º 6
0
    def test_softmax_n3(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.Softmax(tau=1),
            neighborhood_policy=NeighborhoodPolicy.Clusters(3),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertListEqual(arms, [3, 3])
Ejemplo n.º 7
0
    def test_greedy0_n2_mini(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2, True),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertListEqual(arms, [3, 1])
        self.assertTrue(isinstance(mab._imp.kmeans, MiniBatchKMeans))
Ejemplo n.º 8
0
    def test_copy_arms(self):
        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertTrue(mab.arms is mab._imp.lp_list[0].arms)
        self.assertTrue(mab.arms is mab._imp.lp_list[1].arms)
        mab.add_arm(5)
        self.assertTrue(mab.arms is mab._imp.lp_list[0].arms)
        self.assertTrue(mab.arms is mab._imp.lp_list[1].arms)
Ejemplo n.º 9
0
    def test_add_arm(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.Popularity(),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)
        mab.add_arm(5)
        self.assertTrue(5 in mab.arms)
        self.assertTrue(5 in mab._imp.arms)
        self.assertTrue(5 in mab._imp.lp_list[0].arms)
        self.assertTrue(5 in mab._imp.lp_list[0].arm_to_expectation.keys())
Ejemplo n.º 10
0
    def test_fit_twice_thompson_thresholds(self):

        arm_to_threshold = {1: 1, 2: 5, 3: 2, 4: 3}

        def binarize(arm, reward):
            return reward >= arm_to_threshold[arm]

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 7, 0, 1, 9, 0, 2, 6, 11],
            learning_policy=LearningPolicy.ThompsonSampling(binarize),
            neighborhood_policy=NeighborhoodPolicy.Clusters(3),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertTrue(mab._imp.lp_list[0].is_contextual_binarized)
        self.assertListEqual(arms, [3, 4])
        self.assertEqual(len(mab._imp.decisions), 10)
        self.assertEqual(len(mab._imp.rewards), 10)
        self.assertEqual(len(mab._imp.contexts), 10)
        self.assertEqual(np.ndim(mab._imp.decisions), 1)
        self.assertListEqual(list(set(mab._imp.rewards)), [0, 1])

        decisions2 = [1, 2, 3]
        rewards2 = [11, 1, 6]
        context_history2 = [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0]]
        mab.fit(decisions2, rewards2, context_history2)

        self.assertEqual(len(mab._imp.decisions), 3)
        self.assertEqual(len(mab._imp.rewards), 3)
        self.assertEqual(len(mab._imp.contexts), 3)
        self.assertEqual(np.ndim(mab._imp.decisions), 1)
        self.assertListEqual(list(set(mab._imp.rewards)), [0, 1])
Ejemplo n.º 11
0
    def test_greedy0_n2_exps(self):

        exps, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=False)

        self.assertDictEqual(exps[0], {1: 0.0, 2: 0.0, 3: 0.5, 4: 0})
        self.assertDictEqual(exps[1], {
            1: 1.0,
            2: 0.0,
            3: 0.6666666666666666,
            4: 0
        })
Ejemplo n.º 12
0
 def test_invalid_clusters_num(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Clusters(n_clusters=1))
Ejemplo n.º 13
0
 def test_invalid_minibatch(self):
     with self.assertRaises(TypeError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Clusters(minibatch=0))
Ejemplo n.º 14
0
class BaseTest(unittest.TestCase):

    # A list of valid learning policies
    lps = [
        LearningPolicy.EpsilonGreedy(),
        LearningPolicy.EpsilonGreedy(epsilon=0),
        LearningPolicy.EpsilonGreedy(epsilon=0.0),
        LearningPolicy.EpsilonGreedy(epsilon=0.5),
        LearningPolicy.EpsilonGreedy(epsilon=1),
        LearningPolicy.EpsilonGreedy(epsilon=1.0),
        LearningPolicy.Random(),
        LearningPolicy.Softmax(),
        LearningPolicy.Softmax(tau=0.1),
        LearningPolicy.Softmax(tau=0.5),
        LearningPolicy.Softmax(tau=1),
        LearningPolicy.Softmax(tau=1.0),
        LearningPolicy.Softmax(tau=5.0),
        LearningPolicy.ThompsonSampling(),
        LearningPolicy.UCB1(),
        LearningPolicy.UCB1(alpha=0),
        LearningPolicy.UCB1(alpha=0.0),
        LearningPolicy.UCB1(alpha=0.5),
        LearningPolicy.UCB1(alpha=1),
        LearningPolicy.UCB1(alpha=1.0),
        LearningPolicy.UCB1(alpha=5)
    ]

    para_lps = [
        LearningPolicy.LinTS(alpha=0.00001, l2_lambda=1),
        LearningPolicy.LinTS(alpha=0.5, l2_lambda=1),
        LearningPolicy.LinTS(alpha=1, l2_lambda=1),
        LearningPolicy.LinTS(alpha=0.00001, l2_lambda=0.5),
        LearningPolicy.LinTS(alpha=0.5, l2_lambda=0.5),
        LearningPolicy.LinTS(alpha=1, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=0, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=0.5, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=1, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=0, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=0.5, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=1, l2_lambda=0.5)
    ]

    # A list of valid context policies
    nps = [
        NeighborhoodPolicy.KNearest(),
        NeighborhoodPolicy.KNearest(k=1),
        NeighborhoodPolicy.KNearest(k=3),
        NeighborhoodPolicy.Radius(),
        NeighborhoodPolicy.Radius(2.5),
        NeighborhoodPolicy.Radius(5)
    ]

    cps = [
        NeighborhoodPolicy.Clusters(),
        NeighborhoodPolicy.Clusters(n_clusters=3),
        NeighborhoodPolicy.Clusters(is_minibatch=True),
        NeighborhoodPolicy.Clusters(n_clusters=3, is_minibatch=True)
    ]

    @staticmethod
    def predict(
        arms: List[Arm],
        decisions: Union[List, np.ndarray, pd.Series],
        rewards: Union[List, np.ndarray, pd.Series],
        learning_policy: Union[LearningPolicy.EpsilonGreedy,
                               LearningPolicy.Random, LearningPolicy.Softmax,
                               LearningPolicy.ThompsonSampling,
                               LearningPolicy.UCB1, LearningPolicy.LinTS,
                               LearningPolicy.LinUCB],
        neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters,
                                   NeighborhoodPolicy.Radius,
                                   NeighborhoodPolicy.KNearest] = None,
        context_history: Union[None, List[Num], List[List[Num]], np.ndarray,
                               pd.DataFrame, pd.Series] = None,
        contexts: Union[None, List[Num], List[List[Num]], np.ndarray,
                        pd.DataFrame, pd.Series] = None,
        seed: Optional[int] = 123456,
        num_run: Optional[int] = 1,
        is_predict: Optional[bool] = True,
        n_jobs: Optional[int] = 1,
        backend: Optional[str] = None
    ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB):
        """Sets up a MAB model and runs the given configuration.

        Return list of predictions or prediction and the mab instance, when is_predict is true
        Return list of expectations or expectation and the mab instance, when is predict is false

        Calls the predict or predict_expectation method num_run number of times.
        """

        # Model
        mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs,
                  backend)

        # Train
        mab.fit(decisions, rewards, context_history)

        # Test
        if is_predict:

            # Return: prediction(s) and the MAB instance
            predictions = [mab.predict(contexts) for _ in range(num_run)]
            return predictions[0] if num_run == 1 else predictions, mab

        else:

            # Return: expectations(s) and the MAB instance
            expectations = [
                mab.predict_expectations(contexts) for _ in range(num_run)
            ]
            return expectations[0] if num_run == 1 else expectations, mab

    def assertListAlmostEqual(self, list1, list2):
        """
        Asserts that floating values in the given lists (almost) equals to each other
        """
        if not isinstance(list1, list):
            list1 = list(list1)

        if not isinstance(list2, list):
            list2 = list(list2)

        self.assertEqual(len(list1), len(list2))

        for index, val in enumerate(list1):
            self.assertAlmostEqual(val, list2[index])
Ejemplo n.º 15
0
# Online update of model
knearest.partial_fit(decisions=prediction,
                     rewards=test_df_revenue,
                     contexts=test)

# Updating of the model with new arm
knearest.add_arm(6)

########################################################
# KMeans Neighborhood Policy with UCB1 Learning Policy
########################################################

# KMeans clustering context policy with 4 clusters and ucb1 learning with alpha of 1.25
clusters = MAB(arms=ads,
               learning_policy=LearningPolicy.UCB1(alpha=1.25),
               neighborhood_policy=NeighborhoodPolicy.Clusters(n_clusters=4))

# Learn from previous ads shown and revenues generated
clusters.fit(decisions=train_df['ad'],
             rewards=train_df['revenues'],
             contexts=train)

# Predict the next best ad to show
prediction = clusters.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = clusters.predict_expectations(test)

# Results
print("KMeans: ", prediction, " ", expectations)
assert (prediction == [5, 2])