def predict( arms: List[Arm], decisions: Union[List, np.ndarray, pd.Series], rewards: Union[List, np.ndarray, pd.Series], learning_policy: Union[LearningPolicy.EpsilonGreedy, LearningPolicy.Random, LearningPolicy.Softmax, LearningPolicy.ThompsonSampling, LearningPolicy.UCB1, LearningPolicy.LinTS, LearningPolicy.LinUCB], neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters, NeighborhoodPolicy.Radius, NeighborhoodPolicy.KNearest] = None, context_history: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, contexts: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, seed: Optional[int] = 123456, num_run: Optional[int] = 1, is_predict: Optional[bool] = True, n_jobs: Optional[int] = 1, backend: Optional[str] = None ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB): """Sets up a MAB model and runs the given configuration. Return list of predictions or prediction and the mab instance, when is_predict is true Return list of expectations or expectation and the mab instance, when is predict is false Calls the predict or predict_expectation method num_run number of times. """ # Model mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs, backend) # Train mab.fit(decisions, rewards, context_history) # Test if is_predict: # Return: prediction(s) and the MAB instance predictions = [mab.predict(contexts) for _ in range(num_run)] return predictions[0] if num_run == 1 else predictions, mab else: # Return: expectations(s) and the MAB instance expectations = [ mab.predict_expectations(contexts) for _ in range(num_run) ] return expectations[0] if num_run == 1 else expectations, mab
def test_popularity(self): list_of_arms = ['Arm1', 'Arm2'] decisions = ['Arm1', 'Arm1', 'Arm2', 'Arm1'] rewards = [20, 17, 25, 9] mab = MAB(list_of_arms, LearningPolicy.Popularity()) mab.fit(decisions, rewards) mab.predict() self.assertEqual("Arm2", mab.predict()) self.assertDictEqual( { 'Arm1': 0.38016528925619836, 'Arm2': 0.6198347107438016 }, mab.predict_expectations())
def test_predict_with_no_fit(self): for lp in InvalidTest.lps: mab = MAB([1, 2], lp) with self.assertRaises(Exception): mab.predict_expectations() for lp in InvalidTest.para_lps: mab = MAB([1, 2], lp) with self.assertRaises(Exception): mab.predict_expectations([[0, 1, 1, 2]]) for cp in InvalidTest.nps: for lp in InvalidTest.lps: mab = MAB([1, 2], lp, cp) with self.assertRaises(Exception): mab.predict_expectations([[0, 1, 1, 2]])
def test_exps_without_fit(self): for lp in BaseTest.lps: with self.assertRaises(Exception): mab = MAB([1, 2], lp) mab.predict_expectations() for para_lp in BaseTest.para_lps: with self.assertRaises(Exception): mab = MAB([1, 2], para_lp) mab.predict_expectations([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]]) for cp in BaseTest.nps: for lp in BaseTest.lps: with self.assertRaises(Exception): mab = MAB([1, 2], lp, cp) mab.predict_expectations([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]]) for cp in BaseTest.nps: for para_lp in BaseTest.lps: with self.assertRaises(Exception): mab = MAB([1, 2], para_lp, cp) mab.predict_expectations([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]])
# Radius contextual policy with radius equals to 5 and ucb1 learning with alpha 1.25 radius = MAB(arms=ads, learning_policy=LearningPolicy.UCB1(alpha=1.25), neighborhood_policy=NeighborhoodPolicy.Radius(radius=5)) # Learn from previous ads shown and revenues generated radius.fit(decisions=train_df['ad'], rewards=train_df['revenues'], contexts=train) # Predict the next best ad to show prediction = radius.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = radius.predict_expectations(test) # Results print("Radius: ", prediction, " ", expectations) assert (prediction == [4, 4]) # Online update of model radius.partial_fit(decisions=prediction, rewards=test_df_revenue, contexts=test) # Updating of the model with new arm radius.add_arm(6) ######################################################## # KNearest Neighborhood Policy with UCB1 Learning Policy
# Epsilon Greedy Learning Policy ################################### # Epsilon Greedy learning policy with random exploration set to 15% greedy = MAB(arms=options, learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15), seed=123456) # Learn from previous layouts decisions and revenues generated greedy.fit(decisions=layouts, rewards=revenues) # Predict the next best layouts decision prediction = greedy.predict() # Expected revenues of each layouts learnt from historical data based on epsilon greedy policy expectations = greedy.predict_expectations() # Results print("Epsilon Greedy: ", prediction, " ", expectations) assert (prediction == 1) # Additional historical data becomes available which allows _online learning additional_layouts = [1, 2, 1, 2] additional_revenues = [0, 12, 7, 19] # Online updating of the model greedy.partial_fit(additional_layouts, additional_revenues) # Adding a new layout option greedy.add_arm(3)
################################################## # LinUCB learning policy with alpha 1.25 and l2_lambda 1 linucb = MAB(arms=ads, learning_policy=LearningPolicy.LinUCB(alpha=1.25, l2_lambda=1)) # Learn from previous ads shown and revenues generated linucb.fit(decisions=train_df['ad'], rewards=train_df['revenues'], contexts=train) # Predict the next best ad to show prediction = linucb.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = linucb.predict_expectations(test) # Results print("LinUCB: ", prediction, " ", expectations) assert (prediction == [5, 2]) # Online update of model linucb.partial_fit(decisions=prediction, rewards=test_df_revenue, contexts=test) # Update the model with new arm linucb.add_arm(6) ################################################################### # LinUCB Learning Policy combined with Radius Neighborhood Policy