Beispiel #1
0
    def test_invalid_ridge_l2_lambda_type(self):
        with self.assertRaises(TypeError):
            self.predict(arms=[1, 2, 3],
                         decisions=[1, 1, 1],
                         rewards=[0, 0, 0],
                         learning_policy=LearningPolicy.LinUCB(alpha=1,
                                                               l2_lambda=None),
                         neighborhood_policy=NeighborhoodPolicy.KNearest(2),
                         context_history=np.array([1, 1, 1]),
                         contexts=np.array([[1, 1]]),
                         seed=123456,
                         num_run=1,
                         is_predict=True)

        with self.assertRaises(TypeError):
            self.predict(arms=[1, 2, 3],
                         decisions=[1, 1, 1],
                         rewards=[0, 0, 0],
                         learning_policy=LearningPolicy.LinTS(alpha=1,
                                                              l2_lambda=None),
                         neighborhood_policy=NeighborhoodPolicy.KNearest(2),
                         context_history=np.array([1, 1, 1]),
                         contexts=np.array([[1, 1]]),
                         seed=123456,
                         num_run=1,
                         is_predict=True)
Beispiel #2
0
    def test_lints(self):
        train_df = pd.DataFrame({
            'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
            'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
            'age':
            [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
            'click_rate': [
                0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56,
                0.22, 0.19, 0.11, 0.83
            ],
            'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]
        })

        # Test data to for new prediction
        test_df = pd.DataFrame({
            'age': [37, 52],
            'click_rate': [0.5, 0.6],
            'subscriber': [0, 1]
        })
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(
            np.asarray(train_df[['age', 'click_rate', 'subscriber']],
                       dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(
            arms=[1, 2, 3, 4, 5],
            decisions=train_df['ad'],
            rewards=train_df['revenues'],
            learning_policy=LearningPolicy.LinTS(alpha=1.5),
            context_history=train,
            contexts=test,
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arms, [5, 2])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())
Beispiel #3
0
    def test_invalid_lp_arg(self):
        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.UCB1(epsilon=2))

        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.EpsilonGreedy(alpha=2))

        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.ThompsonSampling(alpha=2))

        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.Softmax(alpha=2))

        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.LinUCB(tau=1))

        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.LinTS(epsilon=1))
Beispiel #4
0
    def test_lints_knearest(self):

        train_df = pd.DataFrame({
            'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
            'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
            'age':
            [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
            'click_rate': [
                0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56,
                0.22, 0.19, 0.11, 0.83
            ],
            'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]
        })

        # Test data to for new prediction
        test_df = pd.DataFrame({
            'age': [37, 52],
            'click_rate': [0.5, 0.6],
            'subscriber': [0, 1]
        })

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(
            np.asarray(train_df[['age', 'click_rate', 'subscriber']],
                       dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(
            arms=[1, 2, 3, 4, 5],
            decisions=train_df['ad'],
            rewards=train_df['revenues'],
            learning_policy=LearningPolicy.LinTS(alpha=1),
            neighborhood_policy=NeighborhoodPolicy.KNearest(k=4),
            context_history=train,
            contexts=test,
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arms, [1, 2])
Beispiel #5
0
class BaseTest(unittest.TestCase):

    # A list of valid learning policies
    lps = [
        LearningPolicy.EpsilonGreedy(),
        LearningPolicy.EpsilonGreedy(epsilon=0),
        LearningPolicy.EpsilonGreedy(epsilon=0.0),
        LearningPolicy.EpsilonGreedy(epsilon=0.5),
        LearningPolicy.EpsilonGreedy(epsilon=1),
        LearningPolicy.EpsilonGreedy(epsilon=1.0),
        LearningPolicy.Random(),
        LearningPolicy.Softmax(),
        LearningPolicy.Softmax(tau=0.1),
        LearningPolicy.Softmax(tau=0.5),
        LearningPolicy.Softmax(tau=1),
        LearningPolicy.Softmax(tau=1.0),
        LearningPolicy.Softmax(tau=5.0),
        LearningPolicy.ThompsonSampling(),
        LearningPolicy.UCB1(),
        LearningPolicy.UCB1(alpha=0),
        LearningPolicy.UCB1(alpha=0.0),
        LearningPolicy.UCB1(alpha=0.5),
        LearningPolicy.UCB1(alpha=1),
        LearningPolicy.UCB1(alpha=1.0),
        LearningPolicy.UCB1(alpha=5)
    ]

    para_lps = [
        LearningPolicy.LinTS(alpha=0.00001, l2_lambda=1),
        LearningPolicy.LinTS(alpha=0.5, l2_lambda=1),
        LearningPolicy.LinTS(alpha=1, l2_lambda=1),
        LearningPolicy.LinTS(alpha=0.00001, l2_lambda=0.5),
        LearningPolicy.LinTS(alpha=0.5, l2_lambda=0.5),
        LearningPolicy.LinTS(alpha=1, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=0, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=0.5, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=1, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=0, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=0.5, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=1, l2_lambda=0.5)
    ]

    # A list of valid context policies
    nps = [
        NeighborhoodPolicy.KNearest(),
        NeighborhoodPolicy.KNearest(k=1),
        NeighborhoodPolicy.KNearest(k=3),
        NeighborhoodPolicy.Radius(),
        NeighborhoodPolicy.Radius(2.5),
        NeighborhoodPolicy.Radius(5)
    ]

    cps = [
        NeighborhoodPolicy.Clusters(),
        NeighborhoodPolicy.Clusters(n_clusters=3),
        NeighborhoodPolicy.Clusters(is_minibatch=True),
        NeighborhoodPolicy.Clusters(n_clusters=3, is_minibatch=True)
    ]

    @staticmethod
    def predict(
        arms: List[Arm],
        decisions: Union[List, np.ndarray, pd.Series],
        rewards: Union[List, np.ndarray, pd.Series],
        learning_policy: Union[LearningPolicy.EpsilonGreedy,
                               LearningPolicy.Random, LearningPolicy.Softmax,
                               LearningPolicy.ThompsonSampling,
                               LearningPolicy.UCB1, LearningPolicy.LinTS,
                               LearningPolicy.LinUCB],
        neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters,
                                   NeighborhoodPolicy.Radius,
                                   NeighborhoodPolicy.KNearest] = None,
        context_history: Union[None, List[Num], List[List[Num]], np.ndarray,
                               pd.DataFrame, pd.Series] = None,
        contexts: Union[None, List[Num], List[List[Num]], np.ndarray,
                        pd.DataFrame, pd.Series] = None,
        seed: Optional[int] = 123456,
        num_run: Optional[int] = 1,
        is_predict: Optional[bool] = True,
        n_jobs: Optional[int] = 1,
        backend: Optional[str] = None
    ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB):
        """Sets up a MAB model and runs the given configuration.

        Return list of predictions or prediction and the mab instance, when is_predict is true
        Return list of expectations or expectation and the mab instance, when is predict is false

        Calls the predict or predict_expectation method num_run number of times.
        """

        # Model
        mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs,
                  backend)

        # Train
        mab.fit(decisions, rewards, context_history)

        # Test
        if is_predict:

            # Return: prediction(s) and the MAB instance
            predictions = [mab.predict(contexts) for _ in range(num_run)]
            return predictions[0] if num_run == 1 else predictions, mab

        else:

            # Return: expectations(s) and the MAB instance
            expectations = [
                mab.predict_expectations(contexts) for _ in range(num_run)
            ]
            return expectations[0] if num_run == 1 else expectations, mab

    def assertListAlmostEqual(self, list1, list2):
        """
        Asserts that floating values in the given lists (almost) equals to each other
        """
        if not isinstance(list1, list):
            list1 = list(list1)

        if not isinstance(list2, list):
            list2 = list(list2)

        self.assertEqual(len(list1), len(list2))

        for index, val in enumerate(list1):
            self.assertAlmostEqual(val, list2[index])
Beispiel #6
0
prediction = knearest.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = knearest.predict_expectations(test)

# Results
print("KNearest: ", prediction, " ", expectations)
assert (prediction == [1, 2])

##################################################
# Linear Thompson Sampling Learning Policy
##################################################

# LinTS learning policy with alpha 1.25 and l2_lambda 1
lints = MAB(arms=ads,
            learning_policy=LearningPolicy.LinTS(alpha=1.5, l2_lambda=1))

# Learn from previous ads shown and revenues generated
lints.fit(decisions=train_df['ad'],
          rewards=train_df['revenues'],
          contexts=train)

# Predict the next best ad to show
prediction = lints.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = lints.predict_expectations(test)

# Results
print("LinTS: ", prediction, " ", expectations)
assert (prediction == [5, 2])