Ejemplo n.º 1
0
    def test_greedy0_n3(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 0, 0, 1, 1, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Clusters(3),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertListEqual(arms, [2, 2])
        self.assertEqual(mab._imp.kmeans.n_clusters, 3)
Ejemplo n.º 2
0
    def test_greedy0_k2_exps(self):

        exps, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.KNearest(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=False)

        self.assertDictEqual(exps[0], {1: 0.0, 2: 0.0, 3: 0, 4: 0})
        self.assertDictEqual(exps[1], {1: 1.0, 2: 0.0, 3: 0, 4: 0})
Ejemplo n.º 3
0
    def test_alpha0_nearest5(self):

        arm, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1],
            rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=0),
            neighborhood_policy=NeighborhoodPolicy.KNearest(k=5),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=3,
            is_predict=True)

        self.assertEqual(len(arm), 3)
        self.assertEqual(arm, [[3, 3], [3, 3], [3, 3]])
Ejemplo n.º 4
0
    def test_greedy0_n2_mini(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2, True),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertListEqual(arms, [3, 1])
        self.assertTrue(isinstance(mab._imp.kmeans, MiniBatchKMeans))
Ejemplo n.º 5
0
    def test_greedy0_no_nhood_predict_random(self):

        # 2nd, 3rd arm has bad rewards should not be selected
        # Use small neighborhood size to force to no nhood
        arms, mab = self.predict(arms=[1, 2, 3],
                                 decisions=[1, 1, 1, 2, 2, 2],
                                 rewards=[10, 10, 10, -10, -10, -10],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=25),
                                 context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]],
                                 contexts=[[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1]],
                                 seed=7,
                                 num_run=2,
                                 is_predict=True)

        # 3rd arm was never seen but picked up by random neighborhood in both tests
        self.assertListEqual(arms[0], [2, 3])
        self.assertListEqual(arms[1], [2, 1])
Ejemplo n.º 6
0
    def test_partial_fit_indices(self):
        seed = 11
        n_dimensions = 5
        n_tables = 5
        rng = np.random.RandomState(seed)
        contexts = np.array([[rng.rand() for _ in range(7)] for _ in range(10)])
        decisions = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards = np.array([rng.rand() for _ in range(10)])
        lsh = MAB(arms=[0, 1], learning_policy=LearningPolicy.Softmax(),
                  neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions, n_tables),
                  seed=seed)
        lsh.fit(decisions, rewards, contexts)
        contexts2 = np.array([[rng.rand() for _ in range(7)] for _ in range(10)])
        decisions2 = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards2 = np.array([rng.rand() for _ in range(10)])
        lsh.partial_fit(decisions2, rewards2, contexts2)

        self.assertListEqual(lsh._imp.table_to_hash_to_index[0][4], [1, 15, 16])
        self.assertListEqual(lsh._imp.table_to_hash_to_index[0][12], [9, 10, 11, 19])
Ejemplo n.º 7
0
    def test_greedy0_no_nhood_expectation_nan(self):

        # 2nd, 3rd arm has bad rewards should not be selected
        # Use small neighborhood size to force to no nhoods
        exps, mab = self.predict(arms=[1, 2, 3],
                                 decisions=[1, 1, 1, 2, 2, 2],
                                 rewards=[10, 10, 10, -10, -10, -10],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=25),
                                 context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]],
                                 contexts=[[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1]],
                                 seed=7,
                                 num_run=1,
                                 is_predict=False)

        # When there are no neighborhoods, expectations will be nan
        self.assertDictEqual(exps[0], {1: np.nan, 2: np.nan, 3: np.nan})
        self.assertDictEqual(exps[1], {1: np.nan, 2: np.nan, 3: np.nan})
Ejemplo n.º 8
0
    def test_partial_fit_thompson_thresholds(self):

        arm_to_threshold = {1: 1, 2: 5, 3: 2, 4: 3}

        def binarize(arm, reward):
            return reward >= arm_to_threshold[arm]

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 7, 0, 1, 9, 0, 2, 6, 11],
            learning_policy=LearningPolicy.ThompsonSampling(binarize),
            neighborhood_policy=NeighborhoodPolicy.KNearest(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertTrue(mab._imp.lp.is_contextual_binarized)
        self.assertListEqual(arms, [4, 4])
        self.assertEqual(len(mab._imp.decisions), 10)
        self.assertEqual(len(mab._imp.rewards), 10)
        self.assertEqual(len(mab._imp.contexts), 10)
        self.assertEqual(np.ndim(mab._imp.decisions), 1)
        self.assertListEqual(list(set(mab._imp.rewards)), [0, 1])

        decisions2 = [1, 2, 3]
        rewards2 = [11, 1, 6]
        context_history2 = [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0]]
        mab.partial_fit(decisions2, rewards2, context_history2)

        self.assertEqual(len(mab._imp.decisions), 13)
        self.assertEqual(len(mab._imp.rewards), 13)
        self.assertEqual(len(mab._imp.contexts), 13)
        self.assertEqual(np.ndim(mab._imp.decisions), 1)
        arm = mab.predict([[0, 1, 2, 3, 5]])
        self.assertEqual(arm, 3)
        self.assertListEqual(list(set(mab._imp.rewards)), [0, 1])
Ejemplo n.º 9
0
    def test_tables(self):
        seed = 11
        n_dimensions = 5
        n_tables = 5
        rng = np.random.RandomState(seed)
        contexts = np.array([[rng.rand() for _ in range(7)] for _ in range(10)])
        decisions = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards = np.array([rng.rand() for _ in range(10)])
        lsh = MAB(arms=[0, 1], learning_policy=LearningPolicy.Softmax(),
                  neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions, n_tables),
                  seed=seed)
        for i in range(n_tables):
            self.assertListEqual([], lsh._imp.table_to_plane[i])

        lsh.fit(decisions, rewards, contexts)
        self.assertListAlmostEqual(list(lsh._imp.table_to_plane[0][0]),
                             [1.74945474, -0.286073, -0.48456513, -2.65331856, -0.00828463])
        self.assertListEqual(list(lsh._imp.table_to_hash_to_index[0].keys()), [1, 4, 5, 12, 13, 14, 15])
        self.assertListEqual(lsh._imp.table_to_hash_to_index[0][1], [3])
        self.assertListEqual(lsh._imp.table_to_hash_to_index[0][14], [0, 4, 8])
Ejemplo n.º 10
0
    def test_greedy0_no_nhood_predict_weighted(self):

        # 2nd, 3rd arm has bad rewards should not be selected
        # Use small neighborhood size to force to no nhoods
        arms, mab = self.predict(arms=[1, 2, 3],
                                 decisions=[1, 1, 1, 2, 2, 2],
                                 rewards=[10, 10, 10, -10, -10, -10],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.LSHNearest(
                                     n_dimensions=25, no_nhood_prob_of_arm=[0, 0.8, 0.2]),
                                 context_history=[[1, 1, 2, 3, 5], [1, 2, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0]],
                                 contexts=[[0, -1, -2, -3, -5], [-1, -1, -1, -1, -1]],
                                 seed=7,
                                 num_run=2,
                                 is_predict=True)

        # 2nd arm is weighted highly but 3rd is picked too
        self.assertListEqual(arms[0], [2, 2])
        self.assertListEqual(arms[1], [2, 2])
Ejemplo n.º 11
0
    def test_add_arm(self):

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                 rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=2),
                                 context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                                                  [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                                                  [0, 2, 1, 0, 0]],
                                 contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)
        mab.add_arm(5)
        self.assertTrue(5 in mab.arms)
        self.assertTrue(5 in mab._imp.arms)
        self.assertTrue(5 in mab._imp.lp.arms)
        self.assertTrue(5 in mab._imp.lp.arm_to_expectation.keys())
Ejemplo n.º 12
0
    def test_lints_knearest(self):

        train_df = pd.DataFrame({
            'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
            'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
            'age':
            [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
            'click_rate': [
                0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56,
                0.22, 0.19, 0.11, 0.83
            ],
            'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]
        })

        # Test data to for new prediction
        test_df = pd.DataFrame({
            'age': [37, 52],
            'click_rate': [0.5, 0.6],
            'subscriber': [0, 1]
        })

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(
            np.asarray(train_df[['age', 'click_rate', 'subscriber']],
                       dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(
            arms=[1, 2, 3, 4, 5],
            decisions=train_df['ad'],
            rewards=train_df['revenues'],
            learning_policy=LearningPolicy.LinTS(alpha=1),
            neighborhood_policy=NeighborhoodPolicy.KNearest(k=4),
            context_history=train,
            contexts=test,
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arms, [1, 2])
Ejemplo n.º 13
0
    def test_copy_arms(self):
        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertTrue(mab.arms is mab._imp.lp_list[0].arms)
        self.assertTrue(mab.arms is mab._imp.lp_list[1].arms)
        mab.add_arm(5)
        self.assertTrue(mab.arms is mab._imp.lp_list[0].arms)
        self.assertTrue(mab.arms is mab._imp.lp_list[1].arms)
Ejemplo n.º 14
0
    def test_add_arm(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.Popularity(),
            neighborhood_policy=NeighborhoodPolicy.Clusters(2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)
        mab.add_arm(5)
        self.assertTrue(5 in mab.arms)
        self.assertTrue(5 in mab._imp.arms)
        self.assertTrue(5 in mab._imp.lp_list[0].arms)
        self.assertTrue(5 in mab._imp.lp_list[0].arm_to_expectation.keys())
Ejemplo n.º 15
0
test_df_revenue = pd.Series([7, 13])

# Scale the training and test data
scaler = StandardScaler()
train = scaler.fit_transform(train_df[['age', 'click_rate',
                                       'subscriber']].values.astype('float64'))
test = scaler.transform(test_df.values.astype('float64'))

########################################################
# Radius Neighborhood Policy with UCB1 Learning Policy
########################################################

# Radius contextual policy with radius equals to 5 and ucb1 learning with alpha 1.25
radius = MAB(arms=ads,
             learning_policy=LearningPolicy.UCB1(alpha=1.25),
             neighborhood_policy=NeighborhoodPolicy.Radius(radius=5))

# Learn from previous ads shown and revenues generated
radius.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = radius.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = radius.predict_expectations(test)

# Results
print("Radius: ", prediction, " ", expectations)
assert (prediction == [4, 4])
Ejemplo n.º 16
0
 def test_invalid_n_tables_type(self):
     with self.assertRaises(TypeError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.LSHNearest(n_tables='string'))
Ejemplo n.º 17
0
 def test_invalid_learning_policy(self):
     with self.assertRaises(TypeError):
         MAB([0, 1], NeighborhoodPolicy.Radius(radius=12))
Ejemplo n.º 18
0
 def test_invalid_clusters_num(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Clusters(n_clusters=1))
Ejemplo n.º 19
0
 def test_invalid_k(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.KNearest(k=0))
Ejemplo n.º 20
0
 def test_invalid_minibatch(self):
     with self.assertRaises(TypeError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Clusters(minibatch=0))
Ejemplo n.º 21
0
 def test_invalid_radius(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Radius(radius=-1))
Ejemplo n.º 22
0
 def test_invalid_radius_no_nhood_sum(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Radius(radius=1,
                                       no_nhood_prob_of_arm=[0, 0]))
Ejemplo n.º 23
0
class BaseTest(unittest.TestCase):

    # A list of valid learning policies
    lps = [
        LearningPolicy.EpsilonGreedy(),
        LearningPolicy.EpsilonGreedy(epsilon=0),
        LearningPolicy.EpsilonGreedy(epsilon=0.0),
        LearningPolicy.EpsilonGreedy(epsilon=0.5),
        LearningPolicy.EpsilonGreedy(epsilon=1),
        LearningPolicy.EpsilonGreedy(epsilon=1.0),
        LearningPolicy.Random(),
        LearningPolicy.Softmax(),
        LearningPolicy.Softmax(tau=0.1),
        LearningPolicy.Softmax(tau=0.5),
        LearningPolicy.Softmax(tau=1),
        LearningPolicy.Softmax(tau=1.0),
        LearningPolicy.Softmax(tau=5.0),
        LearningPolicy.ThompsonSampling(),
        LearningPolicy.UCB1(),
        LearningPolicy.UCB1(alpha=0),
        LearningPolicy.UCB1(alpha=0.0),
        LearningPolicy.UCB1(alpha=0.5),
        LearningPolicy.UCB1(alpha=1),
        LearningPolicy.UCB1(alpha=1.0),
        LearningPolicy.UCB1(alpha=5)
    ]

    para_lps = [
        LearningPolicy.LinTS(alpha=0.00001, l2_lambda=1),
        LearningPolicy.LinTS(alpha=0.5, l2_lambda=1),
        LearningPolicy.LinTS(alpha=1, l2_lambda=1),
        LearningPolicy.LinTS(alpha=0.00001, l2_lambda=0.5),
        LearningPolicy.LinTS(alpha=0.5, l2_lambda=0.5),
        LearningPolicy.LinTS(alpha=1, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=0, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=0.5, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=1, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=0, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=0.5, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=1, l2_lambda=0.5)
    ]

    # A list of valid context policies
    nps = [
        NeighborhoodPolicy.KNearest(),
        NeighborhoodPolicy.KNearest(k=1),
        NeighborhoodPolicy.KNearest(k=3),
        NeighborhoodPolicy.Radius(),
        NeighborhoodPolicy.Radius(2.5),
        NeighborhoodPolicy.Radius(5)
    ]

    cps = [
        NeighborhoodPolicy.Clusters(),
        NeighborhoodPolicy.Clusters(n_clusters=3),
        NeighborhoodPolicy.Clusters(is_minibatch=True),
        NeighborhoodPolicy.Clusters(n_clusters=3, is_minibatch=True)
    ]

    @staticmethod
    def predict(
        arms: List[Arm],
        decisions: Union[List, np.ndarray, pd.Series],
        rewards: Union[List, np.ndarray, pd.Series],
        learning_policy: Union[LearningPolicy.EpsilonGreedy,
                               LearningPolicy.Random, LearningPolicy.Softmax,
                               LearningPolicy.ThompsonSampling,
                               LearningPolicy.UCB1, LearningPolicy.LinTS,
                               LearningPolicy.LinUCB],
        neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters,
                                   NeighborhoodPolicy.Radius,
                                   NeighborhoodPolicy.KNearest] = None,
        context_history: Union[None, List[Num], List[List[Num]], np.ndarray,
                               pd.DataFrame, pd.Series] = None,
        contexts: Union[None, List[Num], List[List[Num]], np.ndarray,
                        pd.DataFrame, pd.Series] = None,
        seed: Optional[int] = 123456,
        num_run: Optional[int] = 1,
        is_predict: Optional[bool] = True,
        n_jobs: Optional[int] = 1,
        backend: Optional[str] = None
    ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB):
        """Sets up a MAB model and runs the given configuration.

        Return list of predictions or prediction and the mab instance, when is_predict is true
        Return list of expectations or expectation and the mab instance, when is predict is false

        Calls the predict or predict_expectation method num_run number of times.
        """

        # Model
        mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs,
                  backend)

        # Train
        mab.fit(decisions, rewards, context_history)

        # Test
        if is_predict:

            # Return: prediction(s) and the MAB instance
            predictions = [mab.predict(contexts) for _ in range(num_run)]
            return predictions[0] if num_run == 1 else predictions, mab

        else:

            # Return: expectations(s) and the MAB instance
            expectations = [
                mab.predict_expectations(contexts) for _ in range(num_run)
            ]
            return expectations[0] if num_run == 1 else expectations, mab

    def assertListAlmostEqual(self, list1, list2):
        """
        Asserts that floating values in the given lists (almost) equals to each other
        """
        if not isinstance(list1, list):
            list1 = list(list1)

        if not isinstance(list2, list):
            list2 = list(list2)

        self.assertEqual(len(list1), len(list2))

        for index, val in enumerate(list1):
            self.assertAlmostEqual(val, list2[index])
Ejemplo n.º 24
0
 def test_invalid_metric(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.Radius(metric='linear'))
Ejemplo n.º 25
0
####################################
# Different Bandits for Simulation
####################################

print('Starting simulation 1\n')


def binarize(decision, reward):

    if decision == 0:
        return reward <= 50
    else:
        return reward >= 220

n_jobs=2
contextual_mabs = [('Random', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                   ('UCB1', MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                   ('ThompsonSampling', MAB([0, 1], LearningPolicy.ThompsonSampling(binarize),
                                            NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                   ('EpsilonGreedy', MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=.15),
                                         NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                   ('Softmax', MAB([0, 1], LearningPolicy.Softmax(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs))]

context_free_mabs = [('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs)),
                     ('UCB1', MAB([0, 1], LearningPolicy.UCB1(1), n_jobs=n_jobs)),
                     ('ThompsonSampling', MAB([0, 1], LearningPolicy.ThompsonSampling(binarize), n_jobs=n_jobs)),
                     ('EpsilonGreedy', MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=.15), n_jobs=n_jobs)),
                     ('Softmax', MAB([0, 1], LearningPolicy.Softmax(), n_jobs=n_jobs))]

mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
          ('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))]
Ejemplo n.º 26
0
 def test_invalid_radius_no_nhood_type_ann(self):
     with self.assertRaises(TypeError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.LSHNearest(no_nhood_prob_of_arm={}))
Ejemplo n.º 27
0
 def test_invalid_n_dimensions_value(self):
     with self.assertRaises(ValueError):
         MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=0),
             NeighborhoodPolicy.LSHNearest(n_dimensions=0))
Ejemplo n.º 28
0
# Online update of model
linucb.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,
                   contexts=test)

# Update the model with new arm
linucb.add_arm(6)

###################################################################
# LinUCB Learning Policy combined with Radius Neighborhood Policy
###################################################################

# Radius context policy with radius equals to 1 and LinUCB learning with alpha of 1
radius = MAB(arms=ads,
             learning_policy=LearningPolicy.LinUCB(alpha=1),
             neighborhood_policy=NeighborhoodPolicy.Radius(radius=1))

# Learn from previous ads shown and revenues generated
radius.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = radius.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = radius.predict_expectations(test)

# Results
print("Radius: ", prediction, " ", expectations)
assert (prediction == [1, 2])
Ejemplo n.º 29
0
test_df_revenue = pd.Series([7, 13])

# Scale the training and test data
scaler = StandardScaler()
train = scaler.fit_transform(train_df[['age', 'click_rate',
                                       'subscriber']].values.astype('float64'))
test = scaler.transform(test_df.values.astype('float64'))

########################################################
# Radius Neighborhood Policy with UCB1 Learning Policy
########################################################

# Radius contextual policy with radius equals to 5 and ucb1 learning with alpha 1.25
radius = MAB(arms=ads,
             learning_policy=LearningPolicy.UCB1(alpha=1.25),
             neighborhood_policy=NeighborhoodPolicy.Radius(radius=5))

# Learn from previous ads shown and revenues generated
radius.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = radius.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = radius.predict_expectations(test)

# Results
print("Radius: ", prediction, " ", expectations)
assert (prediction == [4, 4])