def test_popularity(self): list_of_arms = ['Arm1', 'Arm2'] decisions = ['Arm1', 'Arm1', 'Arm2', 'Arm1'] rewards = [20, 17, 25, 9] mab = MAB(list_of_arms, LearningPolicy.Popularity()) mab.fit(decisions, rewards) mab.predict() self.assertEqual("Arm2", mab.predict()) self.assertDictEqual( { 'Arm1': 0.38016528925619836, 'Arm2': 0.6198347107438016 }, mab.predict_expectations())
def predict( arms: List[Arm], decisions: Union[List, np.ndarray, pd.Series], rewards: Union[List, np.ndarray, pd.Series], learning_policy: Union[LearningPolicy.EpsilonGreedy, LearningPolicy.Random, LearningPolicy.Softmax, LearningPolicy.ThompsonSampling, LearningPolicy.UCB1, LearningPolicy.LinTS, LearningPolicy.LinUCB], neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters, NeighborhoodPolicy.Radius, NeighborhoodPolicy.KNearest] = None, context_history: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, contexts: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, seed: Optional[int] = 123456, num_run: Optional[int] = 1, is_predict: Optional[bool] = True, n_jobs: Optional[int] = 1, backend: Optional[str] = None ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB): """Sets up a MAB model and runs the given configuration. Return list of predictions or prediction and the mab instance, when is_predict is true Return list of expectations or expectation and the mab instance, when is predict is false Calls the predict or predict_expectation method num_run number of times. """ # Model mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs, backend) # Train mab.fit(decisions, rewards, context_history) # Test if is_predict: # Return: prediction(s) and the MAB instance predictions = [mab.predict(contexts) for _ in range(num_run)] return predictions[0] if num_run == 1 else predictions, mab else: # Return: expectations(s) and the MAB instance expectations = [ mab.predict_expectations(contexts) for _ in range(num_run) ] return expectations[0] if num_run == 1 else expectations, mab
def test_predict_without_fit(self): for lp in BaseTest.lps: with self.assertRaises(Exception): mab = MAB([1, 2], lp) mab.predict() for para_lp in BaseTest.para_lps: with self.assertRaises(Exception): mab = MAB([1, 2], para_lp) mab.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]]) for cp in BaseTest.nps: for lp in BaseTest.lps: with self.assertRaises(Exception): mab = MAB([1, 2], lp, cp) mab.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]]) for cp in BaseTest.nps: for para_lp in BaseTest.lps: with self.assertRaises(Exception): mab = MAB([1, 2], para_lp, cp) mab.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]])
######################################################## # Radius Neighborhood Policy with UCB1 Learning Policy ######################################################## # Radius contextual policy with radius equals to 5 and ucb1 learning with alpha 1.25 radius = MAB(arms=ads, learning_policy=LearningPolicy.UCB1(alpha=1.25), neighborhood_policy=NeighborhoodPolicy.Radius(radius=5)) # Learn from previous ads shown and revenues generated radius.fit(decisions=train_df['ad'], rewards=train_df['revenues'], contexts=train) # Predict the next best ad to show prediction = radius.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = radius.predict_expectations(test) # Results print("Radius: ", prediction, " ", expectations) assert (prediction == [4, 4]) # Online update of model radius.partial_fit(decisions=prediction, rewards=test_df_revenue, contexts=test) # Updating of the model with new arm radius.add_arm(6)
revenues = [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10] ################################### # Epsilon Greedy Learning Policy ################################### # Epsilon Greedy learning policy with random exploration set to 15% greedy = MAB(arms=options, learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15), seed=123456) # Learn from previous layouts decisions and revenues generated greedy.fit(decisions=layouts, rewards=revenues) # Predict the next best layouts decision prediction = greedy.predict() # Expected revenues of each layouts learnt from historical data based on epsilon greedy policy expectations = greedy.predict_expectations() # Results print("Epsilon Greedy: ", prediction, " ", expectations) assert (prediction == 1) # Additional historical data becomes available which allows _online learning additional_layouts = [1, 2, 1, 2] additional_revenues = [0, 12, 7, 19] # Online updating of the model greedy.partial_fit(additional_layouts, additional_revenues)
################################################## # Linear Upper Confidence Bound Learning Policy ################################################## # LinUCB learning policy with alpha 1.25 and l2_lambda 1 linucb = MAB(arms=ads, learning_policy=LearningPolicy.LinUCB(alpha=1.25, l2_lambda=1)) # Learn from previous ads shown and revenues generated linucb.fit(decisions=train_df['ad'], rewards=train_df['revenues'], contexts=train) # Predict the next best ad to show prediction = linucb.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = linucb.predict_expectations(test) # Results print("LinUCB: ", prediction, " ", expectations) assert (prediction == [5, 2]) # Online update of model linucb.partial_fit(decisions=prediction, rewards=test_df_revenue, contexts=test) # Update the model with new arm linucb.add_arm(6)
arm_to_scaler = {} for arm in arms: # Get indices for arm indices = np.where(decisions_train == arm) # Fit standard scaler scaler = StandardScaler() scaler.fit(contexts[indices]) arm_to_scaler[arm] = scaler ######################################################## # LinUCB Learning Policy ######################################################## # LinUCB learning policy with alpha 1.25 and n_jobs = -1 (maximum available cores) linucb = MAB(arms=arms, learning_policy=LearningPolicy.LinUCB( alpha=1.25, arm_to_scaler=arm_to_scaler), n_jobs=-1) # Learn from playlists shown and observed click rewards for each arm linucb.fit(decisions=decisions_train, rewards=rewards_train, contexts=contexts_train) # Predict the next best playlist to recommend prediction = linucb.predict(contexts_test) # Results print("LinUCB: ", prediction[:10])
revenues = [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10] ################################### # Epsilon Greedy Learning Policy ################################### # Epsilon Greedy learning policy with random exploration set to 15% greedy = MAB(arms=options, learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15), seed=123456) # Learn from previous layouts decisions and revenues generated greedy.fit(decisions=layouts, rewards=revenues) # Predict the next best layouts decision prediction = greedy.predict() # Expected revenues of each layouts learnt from historical data based on epsilon greedy policy expectations = greedy.predict_expectations() # Results print("Epsilon Greedy: ", prediction, " ", expectations) assert(prediction == 1) # Additional historical data becomes available which allows _online learning additional_layouts = [1, 2, 1, 2] additional_revenues = [0, 12, 7, 19] # Online updating of the model greedy.partial_fit(additional_layouts, additional_revenues)