def test_partial_fit_indices(self): seed = 11 n_dimensions = 5 n_tables = 5 rng = np.random.RandomState(seed) contexts = np.array([[rng.rand() for _ in range(7)] for _ in range(10)]) decisions = np.array([rng.randint(0, 2) for _ in range(10)]) rewards = np.array([rng.rand() for _ in range(10)]) lsh = MAB(arms=[0, 1], learning_policy=LearningPolicy.Softmax(), neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions, n_tables), seed=seed) lsh.fit(decisions, rewards, contexts) contexts2 = np.array([[rng.rand() for _ in range(7)] for _ in range(10)]) decisions2 = np.array([rng.randint(0, 2) for _ in range(10)]) rewards2 = np.array([rng.rand() for _ in range(10)]) lsh.partial_fit(decisions2, rewards2, contexts2) self.assertListEqual(lsh._imp.table_to_hash_to_index[0][4], [1, 15, 16]) self.assertListEqual(lsh._imp.table_to_hash_to_index[0][12], [9, 10, 11, 19])
rewards=train_df['revenues'], contexts=train) # Predict the next best ad to show prediction = radius.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = radius.predict_expectations(test) # Results print("Radius: ", prediction, " ", expectations) assert (prediction == [4, 4]) # Online update of model radius.partial_fit(decisions=prediction, rewards=test_df_revenue, contexts=test) # Updating of the model with new arm radius.add_arm(6) ######################################################## # KNearest Neighborhood Policy with UCB1 Learning Policy ######################################################## # KNearest context policy with k equals to 5 and ucb1 learning with alpha of 1.25 knearest = MAB(arms=ads, learning_policy=LearningPolicy.UCB1(alpha=1.25), neighborhood_policy=NeighborhoodPolicy.KNearest(k=5)) # Learn from previous ads shown and revenues generated
# Predict the next best layouts decision prediction = greedy.predict() # Expected revenues of each layouts learnt from historical data based on epsilon greedy policy expectations = greedy.predict_expectations() # Results print("Epsilon Greedy: ", prediction, " ", expectations) assert (prediction == 1) # Additional historical data becomes available which allows _online learning additional_layouts = [1, 2, 1, 2] additional_revenues = [0, 12, 7, 19] # Online updating of the model greedy.partial_fit(additional_layouts, additional_revenues) # Adding a new layout option greedy.add_arm(3) ################################################# # Randomized Popularity Learning Policy ################################################# # Randomized Popularity learning policy that select arms # with weighted probability based on the mean reward for each arm popularity = MAB(arms=options, learning_policy=LearningPolicy.Popularity(), seed=123456) # Learn from previous layouts decisions and revenues generated
rewards=train_df['revenues'], contexts=train) # Predict the next best ad to show prediction = linucb.predict(test) # Expectation of each ad based on learning from past ad revenues expectations = linucb.predict_expectations(test) # Results print("LinUCB: ", prediction, " ", expectations) assert (prediction == [5, 2]) # Online update of model linucb.partial_fit(decisions=prediction, rewards=test_df_revenue, contexts=test) # Update the model with new arm linucb.add_arm(6) ################################################################### # LinUCB Learning Policy combined with Radius Neighborhood Policy ################################################################### # Radius context policy with radius equals to 1 and LinUCB learning with alpha of 1 radius = MAB(arms=ads, learning_policy=LearningPolicy.LinUCB(alpha=1), neighborhood_policy=NeighborhoodPolicy.Radius(radius=1)) # Learn from previous ads shown and revenues generated