Ejemplo n.º 1
0
    def test_popularity(self):

        list_of_arms = ['Arm1', 'Arm2']
        decisions = ['Arm1', 'Arm1', 'Arm2', 'Arm1']
        rewards = [20, 17, 25, 9]
        mab = MAB(list_of_arms, LearningPolicy.Popularity())
        mab.fit(decisions, rewards)
        mab.predict()
        self.assertEqual("Arm2", mab.predict())
        self.assertDictEqual(
            {
                'Arm1': 0.38016528925619836,
                'Arm2': 0.6198347107438016
            }, mab.predict_expectations())
Ejemplo n.º 2
0
    def predict(
        arms: List[Arm],
        decisions: Union[List, np.ndarray, pd.Series],
        rewards: Union[List, np.ndarray, pd.Series],
        learning_policy: Union[LearningPolicy.EpsilonGreedy,
                               LearningPolicy.Random, LearningPolicy.Softmax,
                               LearningPolicy.ThompsonSampling,
                               LearningPolicy.UCB1, LearningPolicy.LinTS,
                               LearningPolicy.LinUCB],
        neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters,
                                   NeighborhoodPolicy.Radius,
                                   NeighborhoodPolicy.KNearest] = None,
        context_history: Union[None, List[Num], List[List[Num]], np.ndarray,
                               pd.DataFrame, pd.Series] = None,
        contexts: Union[None, List[Num], List[List[Num]], np.ndarray,
                        pd.DataFrame, pd.Series] = None,
        seed: Optional[int] = 123456,
        num_run: Optional[int] = 1,
        is_predict: Optional[bool] = True,
        n_jobs: Optional[int] = 1,
        backend: Optional[str] = None
    ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB):
        """Sets up a MAB model and runs the given configuration.

        Return list of predictions or prediction and the mab instance, when is_predict is true
        Return list of expectations or expectation and the mab instance, when is predict is false

        Calls the predict or predict_expectation method num_run number of times.
        """

        # Model
        mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs,
                  backend)

        # Train
        mab.fit(decisions, rewards, context_history)

        # Test
        if is_predict:

            # Return: prediction(s) and the MAB instance
            predictions = [mab.predict(contexts) for _ in range(num_run)]
            return predictions[0] if num_run == 1 else predictions, mab

        else:

            # Return: expectations(s) and the MAB instance
            expectations = [
                mab.predict_expectations(contexts) for _ in range(num_run)
            ]
            return expectations[0] if num_run == 1 else expectations, mab
Ejemplo n.º 3
0
    def test_predict_without_fit(self):
        for lp in BaseTest.lps:
            with self.assertRaises(Exception):
                mab = MAB([1, 2], lp)
                mab.predict()

        for para_lp in BaseTest.para_lps:
            with self.assertRaises(Exception):
                mab = MAB([1, 2], para_lp)
                mab.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]])

        for cp in BaseTest.nps:
            for lp in BaseTest.lps:
                with self.assertRaises(Exception):
                    mab = MAB([1, 2], lp, cp)
                    mab.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]])

        for cp in BaseTest.nps:
            for para_lp in BaseTest.lps:
                with self.assertRaises(Exception):
                    mab = MAB([1, 2], para_lp, cp)
                    mab.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]])
Ejemplo n.º 4
0
########################################################
# Radius Neighborhood Policy with UCB1 Learning Policy
########################################################

# Radius contextual policy with radius equals to 5 and ucb1 learning with alpha 1.25
radius = MAB(arms=ads,
             learning_policy=LearningPolicy.UCB1(alpha=1.25),
             neighborhood_policy=NeighborhoodPolicy.Radius(radius=5))

# Learn from previous ads shown and revenues generated
radius.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = radius.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = radius.predict_expectations(test)

# Results
print("Radius: ", prediction, " ", expectations)
assert (prediction == [4, 4])

# Online update of model
radius.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,
                   contexts=test)

# Updating of the model with new arm
radius.add_arm(6)
Ejemplo n.º 5
0
revenues = [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10]

###################################
# Epsilon Greedy Learning Policy
###################################

# Epsilon Greedy learning policy with random exploration set to 15%
greedy = MAB(arms=options,
             learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15),
             seed=123456)

# Learn from previous layouts decisions and revenues generated
greedy.fit(decisions=layouts, rewards=revenues)

# Predict the next best layouts decision
prediction = greedy.predict()

# Expected revenues of each layouts learnt from historical data based on epsilon greedy policy
expectations = greedy.predict_expectations()

# Results
print("Epsilon Greedy: ", prediction, " ", expectations)
assert (prediction == 1)

# Additional historical data becomes available which allows _online learning
additional_layouts = [1, 2, 1, 2]
additional_revenues = [0, 12, 7, 19]

# Online updating of the model
greedy.partial_fit(additional_layouts, additional_revenues)
Ejemplo n.º 6
0
##################################################
# Linear Upper Confidence Bound Learning Policy
##################################################

# LinUCB learning policy with alpha 1.25 and l2_lambda 1
linucb = MAB(arms=ads,
             learning_policy=LearningPolicy.LinUCB(alpha=1.25, l2_lambda=1))

# Learn from previous ads shown and revenues generated
linucb.fit(decisions=train_df['ad'],
           rewards=train_df['revenues'],
           contexts=train)

# Predict the next best ad to show
prediction = linucb.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = linucb.predict_expectations(test)

# Results
print("LinUCB: ", prediction, " ", expectations)
assert (prediction == [5, 2])

# Online update of model
linucb.partial_fit(decisions=prediction,
                   rewards=test_df_revenue,
                   contexts=test)

# Update the model with new arm
linucb.add_arm(6)
Ejemplo n.º 7
0
arm_to_scaler = {}
for arm in arms:
    # Get indices for arm
    indices = np.where(decisions_train == arm)

    # Fit standard scaler
    scaler = StandardScaler()
    scaler.fit(contexts[indices])
    arm_to_scaler[arm] = scaler

########################################################
# LinUCB Learning Policy
########################################################

# LinUCB learning policy with alpha 1.25 and n_jobs = -1 (maximum available cores)
linucb = MAB(arms=arms,
             learning_policy=LearningPolicy.LinUCB(
                 alpha=1.25, arm_to_scaler=arm_to_scaler),
             n_jobs=-1)

# Learn from playlists shown and observed click rewards for each arm
linucb.fit(decisions=decisions_train,
           rewards=rewards_train,
           contexts=contexts_train)

# Predict the next best playlist to recommend
prediction = linucb.predict(contexts_test)

# Results
print("LinUCB: ", prediction[:10])
Ejemplo n.º 8
0
revenues = [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10]

###################################
# Epsilon Greedy Learning Policy
###################################

# Epsilon Greedy learning policy with random exploration set to 15%
greedy = MAB(arms=options,
             learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15),
             seed=123456)

# Learn from previous layouts decisions and revenues generated
greedy.fit(decisions=layouts, rewards=revenues)

# Predict the next best layouts decision
prediction = greedy.predict()

# Expected revenues of each layouts learnt from historical data based on epsilon greedy policy
expectations = greedy.predict_expectations()

# Results
print("Epsilon Greedy: ", prediction, " ", expectations)
assert(prediction == 1)

# Additional historical data becomes available which allows _online learning
additional_layouts = [1, 2, 1, 2]
additional_revenues = [0, 12, 7, 19]

# Online updating of the model
greedy.partial_fit(additional_layouts, additional_revenues)