def test_invalid_add_arm_scaler(self): scaler = StandardScaler() arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)} mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler)) with self.assertRaises(TypeError): mab.add_arm(2, scaler=deepcopy(scaler))
def test_add_arm_scaler(self): scaler = StandardScaler() scaler.fit( np.array([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]]).astype('float64')) arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)} mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler)) mab.add_arm(2, scaler=deepcopy(scaler))
def test_invalid_add_arm(self): mab = MAB([1, 2, 3], LearningPolicy.EpsilonGreedy(epsilon=0)) with self.assertRaises(ValueError): mab.add_arm(None) with self.assertRaises(ValueError): mab.add_arm(np.nan) with self.assertRaises(ValueError): mab.add_arm(np.inf) with self.assertRaises(ValueError): mab.add_arm(3)
prediction = radius.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = radius.predict_expectations(test) # Results print("Radius: ", prediction, " ", expectations) assert (prediction == [4, 4]) # Online update of model radius.partial_fit(decisions=prediction, rewards=test_df_revenue, contexts=test) # Updating of the model with new arm radius.add_arm(6) ######################################################## # KNearest Neighborhood Policy with UCB1 Learning Policy ######################################################## # KNearest context policy with k equals to 5 and ucb1 learning with alpha of 1.25 knearest = MAB(arms=ads, learning_policy=LearningPolicy.UCB1(alpha=1.25), neighborhood_policy=NeighborhoodPolicy.KNearest(k=5)) # Learn from previous ads shown and revenues generated knearest.fit(decisions=train_df['ad'], rewards=train_df['revenues'], contexts=train)
# Expected revenues of each layouts learnt from historical data based on epsilon greedy policy expectations = greedy.predict_expectations() # Results print("Epsilon Greedy: ", prediction, " ", expectations) assert (prediction == 1) # Additional historical data becomes available which allows _online learning additional_layouts = [1, 2, 1, 2] additional_revenues = [0, 12, 7, 19] # Online updating of the model greedy.partial_fit(additional_layouts, additional_revenues) # Adding a new layout option greedy.add_arm(3) ################################################# # Randomized Popularity Learning Policy ################################################# # Randomized Popularity learning policy that select arms # with weighted probability based on the mean reward for each arm popularity = MAB(arms=options, learning_policy=LearningPolicy.Popularity(), seed=123456) # Learn from previous layouts decisions and revenues generated popularity.fit(decisions=layouts, rewards=revenues) # Predict the next best layouts decision
prediction = linucb.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = linucb.predict_expectations(test) # Results print("LinUCB: ", prediction, " ", expectations) assert (prediction == [5, 2]) # Online update of model linucb.partial_fit(decisions=prediction, rewards=test_df_revenue, contexts=test) # Update the model with new arm linucb.add_arm(6) ################################################################### # LinUCB Learning Policy combined with Radius Neighborhood Policy ################################################################### # Radius context policy with radius equals to 1 and LinUCB learning with alpha of 1 radius = MAB(arms=ads, learning_policy=LearningPolicy.LinUCB(alpha=1), neighborhood_policy=NeighborhoodPolicy.Radius(radius=1)) # Learn from previous ads shown and revenues generated radius.fit(decisions=train_df['ad'], rewards=train_df['revenues'], contexts=train)