def test_invalid_ridge_l2_lambda_type(self): with self.assertRaises(TypeError): self.predict(arms=[1, 2, 3], decisions=[1, 1, 1], rewards=[0, 0, 0], learning_policy=LearningPolicy.LinUCB(alpha=1, l2_lambda=None), neighborhood_policy=NeighborhoodPolicy.KNearest(2), context_history=np.array([1, 1, 1]), contexts=np.array([[1, 1]]), seed=123456, num_run=1, is_predict=True) with self.assertRaises(TypeError): self.predict(arms=[1, 2, 3], decisions=[1, 1, 1], rewards=[0, 0, 0], learning_policy=LearningPolicy.LinTS(alpha=1, l2_lambda=None), neighborhood_policy=NeighborhoodPolicy.KNearest(2), context_history=np.array([1, 1, 1]), contexts=np.array([[1, 1]]), seed=123456, num_run=1, is_predict=True)
def test_lints(self): train_df = pd.DataFrame({ 'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5], 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10], 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38], 'click_rate': [ 0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83 ], 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0] }) # Test data to for new prediction test_df = pd.DataFrame({ 'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1] }) test_df_revenue = pd.Series([7, 13]) # Scale the data scaler = StandardScaler() train = scaler.fit_transform( np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64')) test = scaler.transform(np.asarray(test_df, dtype='float64')) arms, mab = self.predict( arms=[1, 2, 3, 4, 5], decisions=train_df['ad'], rewards=train_df['revenues'], learning_policy=LearningPolicy.LinTS(alpha=1.5), context_history=train, contexts=test, seed=123456, num_run=1, is_predict=True) self.assertEqual(arms, [5, 2]) mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test) mab.add_arm(6) self.assertTrue(6 in mab.arms) self.assertTrue(6 in mab._imp.arm_to_expectation.keys())
def test_invalid_lp_arg(self): with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.UCB1(epsilon=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.EpsilonGreedy(alpha=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.ThompsonSampling(alpha=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.Softmax(alpha=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.LinUCB(tau=1)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.LinTS(epsilon=1))
def test_lints_knearest(self): train_df = pd.DataFrame({ 'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5], 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10], 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38], 'click_rate': [ 0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83 ], 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0] }) # Test data to for new prediction test_df = pd.DataFrame({ 'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1] }) # Scale the data scaler = StandardScaler() train = scaler.fit_transform( np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64')) test = scaler.transform(np.asarray(test_df, dtype='float64')) arms, mab = self.predict( arms=[1, 2, 3, 4, 5], decisions=train_df['ad'], rewards=train_df['revenues'], learning_policy=LearningPolicy.LinTS(alpha=1), neighborhood_policy=NeighborhoodPolicy.KNearest(k=4), context_history=train, contexts=test, seed=123456, num_run=1, is_predict=True) self.assertEqual(arms, [1, 2])
class BaseTest(unittest.TestCase): # A list of valid learning policies lps = [ LearningPolicy.EpsilonGreedy(), LearningPolicy.EpsilonGreedy(epsilon=0), LearningPolicy.EpsilonGreedy(epsilon=0.0), LearningPolicy.EpsilonGreedy(epsilon=0.5), LearningPolicy.EpsilonGreedy(epsilon=1), LearningPolicy.EpsilonGreedy(epsilon=1.0), LearningPolicy.Random(), LearningPolicy.Softmax(), LearningPolicy.Softmax(tau=0.1), LearningPolicy.Softmax(tau=0.5), LearningPolicy.Softmax(tau=1), LearningPolicy.Softmax(tau=1.0), LearningPolicy.Softmax(tau=5.0), LearningPolicy.ThompsonSampling(), LearningPolicy.UCB1(), LearningPolicy.UCB1(alpha=0), LearningPolicy.UCB1(alpha=0.0), LearningPolicy.UCB1(alpha=0.5), LearningPolicy.UCB1(alpha=1), LearningPolicy.UCB1(alpha=1.0), LearningPolicy.UCB1(alpha=5) ] para_lps = [ LearningPolicy.LinTS(alpha=0.00001, l2_lambda=1), LearningPolicy.LinTS(alpha=0.5, l2_lambda=1), LearningPolicy.LinTS(alpha=1, l2_lambda=1), LearningPolicy.LinTS(alpha=0.00001, l2_lambda=0.5), LearningPolicy.LinTS(alpha=0.5, l2_lambda=0.5), LearningPolicy.LinTS(alpha=1, l2_lambda=0.5), LearningPolicy.LinUCB(alpha=0, l2_lambda=1), LearningPolicy.LinUCB(alpha=0.5, l2_lambda=1), LearningPolicy.LinUCB(alpha=1, l2_lambda=1), LearningPolicy.LinUCB(alpha=0, l2_lambda=0.5), LearningPolicy.LinUCB(alpha=0.5, l2_lambda=0.5), LearningPolicy.LinUCB(alpha=1, l2_lambda=0.5) ] # A list of valid context policies nps = [ NeighborhoodPolicy.KNearest(), NeighborhoodPolicy.KNearest(k=1), NeighborhoodPolicy.KNearest(k=3), NeighborhoodPolicy.Radius(), NeighborhoodPolicy.Radius(2.5), NeighborhoodPolicy.Radius(5) ] cps = [ NeighborhoodPolicy.Clusters(), NeighborhoodPolicy.Clusters(n_clusters=3), NeighborhoodPolicy.Clusters(is_minibatch=True), NeighborhoodPolicy.Clusters(n_clusters=3, is_minibatch=True) ] @staticmethod def predict( arms: List[Arm], decisions: Union[List, np.ndarray, pd.Series], rewards: Union[List, np.ndarray, pd.Series], learning_policy: Union[LearningPolicy.EpsilonGreedy, LearningPolicy.Random, LearningPolicy.Softmax, LearningPolicy.ThompsonSampling, LearningPolicy.UCB1, LearningPolicy.LinTS, LearningPolicy.LinUCB], neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters, NeighborhoodPolicy.Radius, NeighborhoodPolicy.KNearest] = None, context_history: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, contexts: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, seed: Optional[int] = 123456, num_run: Optional[int] = 1, is_predict: Optional[bool] = True, n_jobs: Optional[int] = 1, backend: Optional[str] = None ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB): """Sets up a MAB model and runs the given configuration. Return list of predictions or prediction and the mab instance, when is_predict is true Return list of expectations or expectation and the mab instance, when is predict is false Calls the predict or predict_expectation method num_run number of times. """ # Model mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs, backend) # Train mab.fit(decisions, rewards, context_history) # Test if is_predict: # Return: prediction(s) and the MAB instance predictions = [mab.predict(contexts) for _ in range(num_run)] return predictions[0] if num_run == 1 else predictions, mab else: # Return: expectations(s) and the MAB instance expectations = [ mab.predict_expectations(contexts) for _ in range(num_run) ] return expectations[0] if num_run == 1 else expectations, mab def assertListAlmostEqual(self, list1, list2): """ Asserts that floating values in the given lists (almost) equals to each other """ if not isinstance(list1, list): list1 = list(list1) if not isinstance(list2, list): list2 = list(list2) self.assertEqual(len(list1), len(list2)) for index, val in enumerate(list1): self.assertAlmostEqual(val, list2[index])
prediction = knearest.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = knearest.predict_expectations(test) # Results print("KNearest: ", prediction, " ", expectations) assert (prediction == [1, 2]) ################################################## # Linear Thompson Sampling Learning Policy ################################################## # LinTS learning policy with alpha 1.25 and l2_lambda 1 lints = MAB(arms=ads, learning_policy=LearningPolicy.LinTS(alpha=1.5, l2_lambda=1)) # Learn from previous ads shown and revenues generated lints.fit(decisions=train_df['ad'], rewards=train_df['revenues'], contexts=train) # Predict the next best ad to show prediction = lints.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = lints.predict_expectations(test) # Results print("LinTS: ", prediction, " ", expectations) assert (prediction == [5, 2])