Beispiel #1
0
    def test_partial_fit_indices(self):
        seed = 11
        n_dimensions = 5
        n_tables = 5
        rng = np.random.RandomState(seed)
        contexts = np.array([[rng.rand() for _ in range(7)] for _ in range(10)])
        decisions = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards = np.array([rng.rand() for _ in range(10)])
        lsh = MAB(arms=[0, 1], learning_policy=LearningPolicy.Softmax(),
                  neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions, n_tables),
                  seed=seed)
        lsh.fit(decisions, rewards, contexts)
        contexts2 = np.array([[rng.rand() for _ in range(7)] for _ in range(10)])
        decisions2 = np.array([rng.randint(0, 2) for _ in range(10)])
        rewards2 = np.array([rng.rand() for _ in range(10)])
        lsh.partial_fit(decisions2, rewards2, contexts2)

        self.assertListEqual(lsh._imp.table_to_hash_to_index[0][4], [1, 15, 16])
        self.assertListEqual(lsh._imp.table_to_hash_to_index[0][12], [9, 10, 11, 19])
Beispiel #2
0
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = radius.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = radius.predict_expectations(test)

# Results
print("Radius: ", prediction, " ", expectations)
assert (prediction == [4, 4])

# Online update of model
radius.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,
                   contexts=test)

# Updating of the model with new arm
radius.add_arm(6)

########################################################
# KNearest Neighborhood Policy with UCB1 Learning Policy
########################################################

# KNearest context policy with k equals to 5 and ucb1 learning with alpha of 1.25
knearest = MAB(arms=ads,
               learning_policy=LearningPolicy.UCB1(alpha=1.25),
               neighborhood_policy=NeighborhoodPolicy.KNearest(k=5))

# Learn from previous ads shown and revenues generated
Beispiel #3
0
# Predict the next best layouts decision
prediction = greedy.predict()

# Expected revenues of each layouts learnt from historical data based on epsilon greedy policy
expectations = greedy.predict_expectations()

# Results
print("Epsilon Greedy: ", prediction, " ", expectations)
assert (prediction == 1)

# Additional historical data becomes available which allows _online learning
additional_layouts = [1, 2, 1, 2]
additional_revenues = [0, 12, 7, 19]

# Online updating of the model
greedy.partial_fit(additional_layouts, additional_revenues)

# Adding a new layout option
greedy.add_arm(3)

#################################################
# Randomized Popularity Learning Policy
#################################################

# Randomized Popularity learning policy that select arms
# with weighted probability based on the mean reward for each arm
popularity = MAB(arms=options,
                 learning_policy=LearningPolicy.Popularity(),
                 seed=123456)

# Learn from previous layouts decisions and revenues generated
Beispiel #4
0
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = linucb.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = linucb.predict_expectations(test)

# Results
print("LinUCB: ", prediction, " ", expectations)
assert (prediction == [5, 2])

# Online update of model
linucb.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,
                   contexts=test)

# Update the model with new arm
linucb.add_arm(6)

###################################################################
# LinUCB Learning Policy combined with Radius Neighborhood Policy
###################################################################

# Radius context policy with radius equals to 1 and LinUCB learning with alpha of 1
radius = MAB(arms=ads,
             learning_policy=LearningPolicy.LinUCB(alpha=1),
             neighborhood_policy=NeighborhoodPolicy.Radius(radius=1))

# Learn from previous ads shown and revenues generated