コード例 #1
0
ファイル: test_parallel.py プロジェクト: ccoffrin/mabwiser
    def test_linUCB(self):

        rng = np.random.RandomState(seed=111)
        contexts = rng.randint(0, 5, (10, 5))

        arm, mab = self.predict(
            arms=[1, 2, 3, 4, 5],
            decisions=[1, 1, 4, 2, 2, 2, 3, 3, 3, 1],
            rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=0.1),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=contexts,
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2,
            backend=None)

        self.assertEqual(arm, [4, 4, 3, 3, 4, 4, 4, 3, 4, 3])

        arm, mab = self.predict(
            arms=[1, 2, 3, 4, 5],
            decisions=[1, 1, 4, 2, 2, 2, 3, 3, 3, 1],
            rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=0.1),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=contexts,
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2,
            backend='loky')

        self.assertEqual(arm, [4, 4, 3, 3, 4, 4, 4, 3, 4, 3])

        arm, mab = self.predict(
            arms=[1, 2, 3, 4, 5],
            decisions=[1, 1, 4, 2, 2, 2, 3, 3, 3, 1],
            rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=0.1),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=contexts,
            seed=123456,
            num_run=1,
            is_predict=True,
            n_jobs=2,
            backend='threading')

        self.assertEqual(arm, [4, 4, 3, 3, 4, 4, 4, 3, 4, 3])
コード例 #2
0
    def test_partial_vs_batch_fit(self):

        # Batch fit
        context_batch = np.array([[1, 0, 0, 0, 1], [0, 1, 2, 3, 4],
                                  [2, 0, 1, 0, 2], [2, 1, 2, 1, 2],
                                  [3, 3, 3, 2, 1], [1, 1, 1, 1, 1]])
        rewards_batch = np.array([0, 1, 1, 0, 1, 0])
        decisions_batch = np.array([1, 1, 1, 0, 0, 1])

        arms_batch, mab_batch = self.predict(
            arms=[0, 1],
            decisions=decisions_batch,
            rewards=rewards_batch,
            learning_policy=LearningPolicy.LinUCB(alpha=1),
            context_history=context_batch,
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        # Partial fit
        context = np.array([[1, 0, 0, 0, 1], [0, 1, 2, 3, 4], [2, 0, 1, 0, 2]])
        rewards = np.array([0, 1, 1])
        decisions = np.array([1, 1, 1])

        arms_partial, mab_partial = self.predict(
            arms=[0, 1],
            decisions=decisions,
            rewards=rewards,
            learning_policy=LearningPolicy.LinUCB(alpha=1),
            context_history=context,
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        context2 = np.array([[2, 1, 2, 1, 2], [3, 3, 3, 2, 1], [1, 1, 1, 1,
                                                                1]])
        rewards2 = np.array([0, 1, 0])
        decisions2 = np.array([0, 0, 1])

        mab_partial.partial_fit(decisions2, rewards2, context2)

        self.assertListEqual(mab_batch._imp.arm_to_model[0].beta.tolist(),
                             mab_partial._imp.arm_to_model[0].beta.tolist())
        self.assertListEqual(mab_batch._imp.arm_to_model[0].Xty.tolist(),
                             mab_partial._imp.arm_to_model[0].Xty.tolist())
        self.assertListEqual(mab_batch._imp.arm_to_model[0].A_inv.tolist(),
                             mab_partial._imp.arm_to_model[0].A_inv.tolist())

        self.assertListEqual(mab_batch._imp.arm_to_model[1].beta.tolist(),
                             mab_partial._imp.arm_to_model[1].beta.tolist())
        self.assertListEqual(mab_batch._imp.arm_to_model[1].Xty.tolist(),
                             mab_partial._imp.arm_to_model[1].Xty.tolist())
        self.assertListEqual(mab_batch._imp.arm_to_model[1].A_inv.tolist(),
                             mab_partial._imp.arm_to_model[1].A_inv.tolist())
コード例 #3
0
    def test_scaler(self):

        arms = [1, 2, 3]
        context_history = np.array(
            [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [
                0, 2, 2, 3, 5
            ], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5],
             [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]],
            dtype='float64')

        contexts = np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]])
        decisions = np.array([1, 1, 1, 2, 2, 3, 3, 3, 3, 3])
        rewards = np.array([0, 0, 1, 0, 0, 0, 0, 1, 1, 1])

        arm_to_scaler = {}
        for arm in arms:
            scaler = StandardScaler()
            df = context_history[decisions == arm]
            scaler.fit(np.asarray(df, dtype='float64'))
            arm_to_scaler[arm] = deepcopy(scaler)

        exp, mab = self.predict(
            arms=arms,
            decisions=decisions,
            rewards=rewards,
            learning_policy=LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler),
            context_history=context_history,
            contexts=contexts,
            seed=123456,
            num_run=1,
            is_predict=False)

        for arm in arms:

            context_history_arm = context_history[decisions == arm]
            context_history_scaled = arm_to_scaler[arm].transform(
                np.asarray(context_history_arm, dtype='float64'))

            contexts_scaled = arm_to_scaler[arm].transform(
                np.asarray(contexts, dtype='float64'))

            exp_check, mab = self.predict(
                arms=arms,
                decisions=decisions[decisions == arm],
                rewards=rewards[decisions == arm],
                learning_policy=LearningPolicy.LinUCB(),
                context_history=context_history_scaled,
                contexts=contexts_scaled,
                seed=123456,
                num_run=1,
                is_predict=False)

            for i in range(len(contexts)):
                self.assertEqual(exp[i][arm], exp_check[i][arm])
コード例 #4
0
    def test_linucb_t5(self):

        arm, mab = self.predict(arms=['one', 'two', 'three'],
                                decisions=[
                                    'one', 'one', 'one', 'three', 'two', 'two',
                                    'three', 'one', 'three', 'two'
                                ],
                                rewards=[1, 0, 1, 0, 1, 0, 1, 1, 1, 0],
                                learning_policy=LearningPolicy.LinUCB(alpha=1),
                                context_history=[[0, 1, 2, 3, 5],
                                                 [1, 1, 1, 1, 1],
                                                 [0, 0, 1, 0, 0],
                                                 [0, 2, 2, 3, 5],
                                                 [1, 3, 1, 1, 1],
                                                 [0, 0, 0, 0, 0],
                                                 [0, 1, 4, 3, 5],
                                                 [0, 1, 2, 4, 5],
                                                 [1, 2, 1, 1, 3],
                                                 [0, 2, 1, 0, 0]],
                                contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                seed=23,
                                num_run=4,
                                is_predict=True)

        self.assertEqual(len(arm), 4)
        self.assertEqual(
            arm,
            [['two', 'two'], ['two', 'two'], ['two', 'two'], ['two', 'two']])
コード例 #5
0
 def test_add_arm_scaler(self):
     scaler = StandardScaler()
     scaler.fit(
         np.array([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]]).astype('float64'))
     arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)}
     mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler))
     mab.add_arm(2, scaler=deepcopy(scaler))
コード例 #6
0
    def test_add_arm(self):

        context = np.array([[1, 0, 2, 1, 1], [3, 1, 2, 3, 4], [2, -1, 1, 0,
                                                               2]])
        rewards = np.array([3, 3, 1])
        decisions = np.array([1, 1, 1])
        arms, mab = self.predict(
            arms=[0, 1],
            decisions=decisions,
            rewards=rewards,
            learning_policy=LearningPolicy.LinUCB(alpha=1),
            context_history=context,
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(mab._imp.num_features, 5)
        self.assertEqual(arms, [0, 0])
        mab.add_arm(2)
        self.assertTrue(2 in mab._imp.arm_to_model.keys())
        self.assertEqual(mab._imp.arm_to_model[2].beta[0], 0)
        self.assertEqual(mab._imp.arm_to_model[2].beta[1], 0)
        self.assertEqual(mab._imp.arm_to_model[2].beta[2], 0)
        self.assertEqual(mab._imp.arm_to_model[2].beta[3], 0)
        self.assertEqual(mab._imp.arm_to_model[2].beta[4], 0)
コード例 #7
0
    def test_unused_arm_scaled(self):

        context_history = np.array(
            [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [
                0, 2, 2, 3, 5
            ], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5],
             [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]],
            dtype='float64')

        scaler = StandardScaler()
        scaled_contexts = scaler.fit_transform(context_history)
        scaled_predict = scaler.transform(
            np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], dtype='float64'))

        exp, mab = self.predict(arms=[1, 2, 3, 4],
                                decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1],
                                learning_policy=LearningPolicy.LinUCB(alpha=1),
                                context_history=scaled_contexts,
                                contexts=scaled_predict,
                                seed=123456,
                                num_run=1,
                                is_predict=False)

        self.assertListAlmostEqual(exp[0].values(), [
            0.702838715092242, 0.8039804426513, 0.8016765077826691,
            1.7398913429630314
        ])
        self.assertListAlmostEqual(exp[1].values(), [
            0.814935740273692, 1.09321065622604, 0.6199330260793201,
            1.8228899573337314
        ])
コード例 #8
0
    def test_df_list(self):

        df = pd.DataFrame({
            'decisions': [1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            'rewards': [0, 0, 1, 0, 0, 0, 0, 1, 1, 1]
        })

        arm, mab = self.predict(arms=[1, 2, 3],
                                decisions=df['decisions'],
                                rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1],
                                learning_policy=LearningPolicy.LinUCB(alpha=1),
                                context_history=[[0, 1, 2, 3, 5],
                                                 [1, 1, 1, 1, 1],
                                                 [0, 0, 1, 0, 0],
                                                 [0, 2, 2, 3, 5],
                                                 [1, 3, 1, 1, 1],
                                                 [0, 0, 0, 0, 0],
                                                 [0, 1, 4, 3, 5],
                                                 [0, 1, 2, 4, 5],
                                                 [1, 2, 1, 1, 3],
                                                 [0, 2, 1, 0, 0]],
                                contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                seed=123456,
                                num_run=3,
                                is_predict=True)

        self.assertEqual(len(arm), 3)
        self.assertEqual(arm, [[3, 3], [3, 3], [3, 3]])
コード例 #9
0
    def test_unused_arm_scaled2(self):

        context_history = np.array(
            [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [
                0, 2, 2, 3, 5
            ], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5],
             [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]],
            dtype='float64')

        scaler = StandardScaler()
        scaled_contexts = scaler.fit_transform(context_history)
        scaled_predict = scaler.transform(
            np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], dtype='float64'))

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=1),
            context_history=scaled_contexts,
            contexts=scaled_predict,
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arms, [4, 4])
コード例 #10
0
ファイル: test_invalid.py プロジェクト: BigRLab/mabwiser
    def test_invalid_ridge_l2_lambda_type(self):
        with self.assertRaises(TypeError):
            self.predict(arms=[1, 2, 3],
                         decisions=[1, 1, 1],
                         rewards=[0, 0, 0],
                         learning_policy=LearningPolicy.LinUCB(alpha=1,
                                                               l2_lambda=None),
                         neighborhood_policy=NeighborhoodPolicy.KNearest(2),
                         context_history=np.array([1, 1, 1]),
                         contexts=np.array([[1, 1]]),
                         seed=123456,
                         num_run=1,
                         is_predict=True)

        with self.assertRaises(TypeError):
            self.predict(arms=[1, 2, 3],
                         decisions=[1, 1, 1],
                         rewards=[0, 0, 0],
                         learning_policy=LearningPolicy.LinTS(alpha=1,
                                                              l2_lambda=None),
                         neighborhood_policy=NeighborhoodPolicy.KNearest(2),
                         context_history=np.array([1, 1, 1]),
                         contexts=np.array([[1, 1]]),
                         seed=123456,
                         num_run=1,
                         is_predict=True)
コード例 #11
0
ファイル: test_invalid.py プロジェクト: BigRLab/mabwiser
    def test_invalid_add_arm_scaler(self):

        scaler = StandardScaler()
        arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)}
        mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler))
        with self.assertRaises(TypeError):
            mab.add_arm(2, scaler=deepcopy(scaler))
コード例 #12
0
    def test_fit_twice_new_features(self):

        context = np.array([[1, 0, 2, 1, 1], [3, 1, 2, 3, 4], [2, -1, 1, 0, 2],
                            [-1, 4, 2, 0, 1], [2, 2, 2, 2, 2], [3, 2, 1, 2, 3],
                            [0, 0, 0, 0, 0], [2, 1, 1, 1, 2], [3, 2, 3, 2, 3],
                            [8, 2, 3, 1, 0], [1, 2, -9, -7, 1],
                            [0, 1, 1, 1, 1]])
        rewards = np.array([3, 3, 1, 0, -1, 2, 1, 2, 1, 1, 0, 3])
        decisions = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
        arms, mab = self.predict(arms=[0, 1],
                                 decisions=decisions,
                                 rewards=rewards,
                                 learning_policy=LearningPolicy.LinUCB(
                                     alpha=1, l2_lambda=0),
                                 context_history=context,
                                 contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(mab._imp.num_features, 5)
        self.assertEqual(arms, [1, 1])

        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[0],
                         0.09224215,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[1],
                         -0.20569848,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[2],
                         0.13434242,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[3],
                         -0.1000045,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[4],
                         0.63726682,
                         abs_tol=0.00000001))

        context2 = np.array([[1, 0, 2, 1, 1, 3], [3, 1, 2, 3, 4, 1],
                             [2, -1, 1, 0, 2, 2], [-1, 4, 2, 0, 1, 0],
                             [1, 2, 3, 4, 5, 1]])
        rewards2 = np.array([-1, 2, 1, 2, 0])
        decisions2 = np.array([1, 1, 1, 1, 1])
        mab.fit(decisions2, rewards2, context2)
        self.assertEqual(mab._imp.num_features, 6)
コード例 #13
0
    def test_invalid_lp_arg(self):
        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.UCB1(epsilon=2))

        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.EpsilonGreedy(alpha=2))

        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.ThompsonSampling(alpha=2))

        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.Softmax(alpha=2))

        with self.assertRaises(TypeError):
            MAB(['a', 'b'], LearningPolicy.LinUCB(tau=1))
コード例 #14
0
    def test_linucb(self):
        train_df = pd.DataFrame({
            'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
            'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
            'age':
            [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
            'click_rate': [
                0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56,
                0.22, 0.19, 0.11, 0.83
            ],
            'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]
        })

        # Test data to for new prediction
        test_df = pd.DataFrame({
            'age': [37, 52],
            'click_rate': [0.5, 0.6],
            'subscriber': [0, 1]
        })
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(
            np.asarray(train_df[['age', 'click_rate', 'subscriber']],
                       dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(
            arms=[1, 2, 3, 4, 5],
            decisions=train_df['ad'],
            rewards=train_df['revenues'],
            learning_policy=LearningPolicy.LinUCB(alpha=1.25),
            context_history=train,
            contexts=test,
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arms, [5, 2])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())
コード例 #15
0
    def test_unused_arm2(self):

        arms, mab = self.predict(
            arms=[1, 2, 3, 4],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=1),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arms, [4, 4])
コード例 #16
0
    def test_partial_fit(self):

        arm, mab = self.predict(arms=[1, 2, 3, 4],
                                decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1],
                                learning_policy=LearningPolicy.LinUCB(alpha=1),
                                context_history=[[0, 1, 2, 3, 5],
                                                 [1, 1, 1, 1, 1],
                                                 [0, 0, 1, 0, 0],
                                                 [0, 2, 2, 3, 5],
                                                 [1, 3, 1, 1, 1],
                                                 [0, 0, 0, 0, 0],
                                                 [0, 1, 4, 3, 5],
                                                 [0, 1, 2, 4, 5],
                                                 [1, 2, 1, 1, 3],
                                                 [0, 2, 1, 0, 0]],
                                contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                seed=123456,
                                num_run=1,
                                is_predict=True)

        self.assertEqual(arm, [4, 4])

        b_1 = mab._imp.arm_to_model[1].beta
        self.assertTrue(math.isclose(-0.0825688, b_1[0], abs_tol=0.00001))

        b_3 = mab._imp.arm_to_model[3].beta
        self.assertTrue(math.isclose(0.023696, b_3[0], abs_tol=0.00001))

        self.assertTrue(4 in mab._imp.arm_to_model.keys())

        # Fit again
        decisions2 = [1, 3, 4]
        rewards2 = [0, 1, 1]
        context_history2 = [[0, 1, 1, 1, 1], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0]]
        mab.partial_fit(decisions2, rewards2, context_history2)

        b_1 = mab._imp.arm_to_model[1].beta
        self.assertTrue(math.isclose(-0.05142857, b_1[0], abs_tol=0.00001))

        b_3 = mab._imp.arm_to_model[3].beta
        self.assertTrue(math.isclose(b_3[0], 0.22099152, abs_tol=0.00001))

        b_4 = mab._imp.arm_to_model[4].beta
        self.assertEqual(b_4[0], 0)
コード例 #17
0
    def test_linucb_t2(self):

        arm, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 3, 2, 2, 3, 1, 3, 1],
            rewards=[0, 1, 1, 0, 1, 0, 1, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=1.5),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=71,
            num_run=4,
            is_predict=True)

        self.assertEqual(len(arm), 4)
        self.assertEqual(arm, [[2, 2], [2, 2], [2, 2], [2, 2]])
コード例 #18
0
    def test_np(self):

        arm, mab = self.predict(
            arms=[1, 2, 3],
            decisions=np.asarray([1, 1, 1, 2, 2, 3, 3, 3, 3, 3]),
            rewards=np.asarray([0, 0, 1, 0, 0, 0, 0, 1, 1, 1]),
            learning_policy=LearningPolicy.LinUCB(alpha=1),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=3,
            is_predict=True)

        self.assertEqual(len(arm), 3)
        self.assertEqual(arm, [[3, 3], [3, 3], [3, 3]])
コード例 #19
0
    def test_linucb_t8(self):

        arm, mab = self.predict(
            arms=['a', 'b', 'c'],
            decisions=['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a'],
            rewards=[-1.25, 0.7, 12, 10, 12, 9.2, -1, -10, 4, 0],
            learning_policy=LearningPolicy.LinUCB(alpha=0.5),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=9,
            num_run=4,
            is_predict=True)

        self.assertEqual(len(arm), 4)
        self.assertEqual(arm, [['c', 'c'], ['c', 'c'], ['c', 'c'], ['c', 'c']])
コード例 #20
0
    def test_alpha0_nearest5(self):

        arm, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1],
            rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=0),
            neighborhood_policy=NeighborhoodPolicy.KNearest(k=5),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=3,
            is_predict=True)

        self.assertEqual(len(arm), 3)
        self.assertEqual(arm, [[3, 3], [3, 3], [3, 3]])
コード例 #21
0
    def test_alpha0_expectations(self):

        exps, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1],
            rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=0),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=False)

        self.assertListAlmostEqual(
            exps[0].values(), [-0.018378378378378413, 0.0, 0.9966292134831471])
        self.assertListAlmostEqual(
            exps[1].values(), [0.14054054054054055, 0.0, 0.43258426966292074])
コード例 #22
0
    def test_linucb_t4(self):

        arm, mab = self.predict(
            arms=[1, 2, 4],
            decisions=[1, 1, 4, 4, 2, 2, 1, 1, 4, 2, 1, 4, 1, 2, 4, 1],
            rewards=[7, 9, 10, 20, 2, 5, 8, 15, 17, 11, 0, 5, 2, 9, 3, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=2),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5],
                             [1, 2, 1, 1, 3], [0, 2, 1, 0, 0], [0, 2, 2, 3, 5],
                             [1, 3, 1, 1, 1]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=23,
            num_run=4,
            is_predict=True)

        self.assertEqual(len(arm), 4)
        self.assertEqual(arm, [[4, 4], [4, 4], [4, 4], [4, 4]])
コード例 #23
0
    def test_linucb_knearest(self):

        train_df = pd.DataFrame({
            'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
            'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
            'age':
            [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
            'click_rate': [
                0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56,
                0.22, 0.19, 0.11, 0.83
            ],
            'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]
        })

        # Test data to for new prediction
        test_df = pd.DataFrame({
            'age': [37, 52],
            'click_rate': [0.5, 0.6],
            'subscriber': [0, 1]
        })

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(
            np.asarray(train_df[['age', 'click_rate', 'subscriber']],
                       dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(
            arms=[1, 2, 3, 4, 5],
            decisions=train_df['ad'],
            rewards=train_df['revenues'],
            learning_policy=LearningPolicy.LinUCB(alpha=1.25),
            neighborhood_policy=NeighborhoodPolicy.KNearest(k=4),
            context_history=train,
            contexts=test,
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arms, [1, 2])
コード例 #24
0
    def test_alpha1_expectations(self):

        exps, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
            rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=1),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=1,
            is_predict=False)

        self.assertListAlmostEqual(
            exps[0].values(),
            [0.9790312458954391, 1.1617706239438832, 1.4247056229871702])
        self.assertListAlmostEqual(
            exps[1].values(),
            [0.8896475809353053, 0.923364043088837, 1.457085577251709])
コード例 #25
0
    def test_l2_low(self):

        context = np.array([[1, 1, 0, 0, 1], [0, 1, 2, 9, 4], [2, 3, 1, 0, 2]])
        rewards = np.array([3, 2, 1])
        decisions = np.array([1, 1, 1])

        arms, mab = self.predict(arms=[0, 1],
                                 decisions=decisions,
                                 rewards=rewards,
                                 learning_policy=LearningPolicy.LinUCB(
                                     alpha=1, l2_lambda=0.1),
                                 context_history=context,
                                 contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(mab._imp.num_features, 5)
        self.assertEqual(arms, [1, 1])
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[0],
                         1.59499705,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[1],
                         -0.91856183,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[2],
                         -2.49775977,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[3],
                         0.14219195,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[4],
                         1.65819347,
                         abs_tol=0.00000001))
コード例 #26
0
    def test_l2_high(self):

        context = np.array([[1, 1, 0, 0, 1], [0, 1, 2, 9, 4], [2, 3, 1, 0, 2]])
        rewards = np.array([3, 2, 1])
        decisions = np.array([1, 1, 1])
        arms, mab = self.predict(arms=[0, 1],
                                 decisions=decisions,
                                 rewards=rewards,
                                 learning_policy=LearningPolicy.LinUCB(
                                     alpha=1, l2_lambda=10),
                                 context_history=context,
                                 contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(mab._imp.num_features, 5)
        self.assertEqual(arms, [0, 0])
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[0],
                         0.18310155,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[1],
                         0.16372811,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[2],
                         -0.00889076,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[3],
                         0.09434416,
                         abs_tol=0.00000001))
        self.assertTrue(
            math.isclose(mab._imp.arm_to_model[1].beta[4],
                         0.22503229,
                         abs_tol=0.00000001))
コード例 #27
0
    def test_linucb_t9(self):

        # Dates to test
        a = datetime.datetime(2018, 1, 1)
        b = datetime.datetime(2017, 7, 31)
        c = datetime.datetime(2018, 9, 15)

        arm, mab = self.predict(
            arms=[a, b, c],
            decisions=[a, b, c, a, b, c, a, b, c, a],
            rewards=[1.25, 0.7, 12, 10, 1.43, 0.2, -1, -10, 4, 0],
            learning_policy=LearningPolicy.LinUCB(alpha=0.25),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=123456,
            num_run=4,
            is_predict=True)

        self.assertEqual(len(arm), 4)
        self.assertEqual(arm, [[c, c], [c, c], [c, c], [c, c]])
コード例 #28
0
    def test_linucb_t10(self):

        # Dates to test
        a = datetime.datetime(2018, 1, 1)
        b = datetime.datetime(2017, 7, 31)
        c = datetime.datetime(2018, 9, 15)

        arm, mab = self.predict(
            arms=[a, b, c],
            decisions=[a, b, c, a, b, c, a, b, c, a, b, b, a],
            rewards=[7, 12, 1, -10, 5, 1, 2, 9, 3, 3, 6, 7, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=1),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0], [0, 1, 2, 3, 5], [1, 1, 1, 1, 1],
                             [0, 0, 1, 0, 0]],
            contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
            seed=7,
            num_run=4,
            is_predict=True)

        self.assertEqual(len(arm), 4)
        self.assertEqual(arm, [[b, b], [b, b], [b, b], [b, b]])
コード例 #29
0
ファイル: test_parallel.py プロジェクト: ccoffrin/mabwiser
    def test_linUCB_expectations(self):

        rng = np.random.RandomState(seed=111)
        contexts = rng.randint(0, 5, (8, 5))
        expected_pred = [
            [1.1923304881612438, 0.386812974778054, 2.036795075137375],
            [1.1383448695075555, 0.16604895162348998, 0.7454336659862624],
            [0.39044990078495967, 0.32572728761335573, 1.0533787080477959],
            [-0.9557496857893883, 0.4393900133310143, 1.4663248923093817],
            [-0.4630963822269796, 0.44282983853389307, 1.4430098512988918],
            [0.26667599463140623, 0.34807480426506293, 1.008245109800643],
            [1.3255310649960248, 0.43761043197507354, 0.9787023941693738],
            [0.33267910305673676, 0.29690114350965546, 1.460951676645638]
        ]

        exps, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1],
            rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=0.1),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=contexts,
            seed=123456,
            num_run=1,
            is_predict=False,
            n_jobs=1)

        for i in range(len(expected_pred)):
            self.assertListAlmostEqual(exps[i].values(), expected_pred[i])

        exps, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1],
            rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=0.1),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=contexts,
            seed=123456,
            num_run=1,
            is_predict=False,
            n_jobs=2)

        for i in range(len(expected_pred)):
            self.assertListAlmostEqual(exps[i].values(), expected_pred[i])

        exps, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1],
            rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1],
            learning_policy=LearningPolicy.LinUCB(alpha=0.1),
            context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                             [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                             [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                             [0, 2, 1, 0, 0]],
            contexts=contexts,
            seed=123456,
            num_run=1,
            is_predict=False,
            n_jobs=-1)

        for i in range(len(expected_pred)):
            self.assertListAlmostEqual(exps[i].values(), expected_pred[i])
コード例 #30
0
class BaseTest(unittest.TestCase):

    # A list of valid learning policies
    lps = [
        LearningPolicy.EpsilonGreedy(),
        LearningPolicy.EpsilonGreedy(epsilon=0),
        LearningPolicy.EpsilonGreedy(epsilon=0.0),
        LearningPolicy.EpsilonGreedy(epsilon=0.5),
        LearningPolicy.EpsilonGreedy(epsilon=1),
        LearningPolicy.EpsilonGreedy(epsilon=1.0),
        LearningPolicy.Random(),
        LearningPolicy.Softmax(),
        LearningPolicy.Softmax(tau=0.1),
        LearningPolicy.Softmax(tau=0.5),
        LearningPolicy.Softmax(tau=1),
        LearningPolicy.Softmax(tau=1.0),
        LearningPolicy.Softmax(tau=5.0),
        LearningPolicy.ThompsonSampling(),
        LearningPolicy.UCB1(),
        LearningPolicy.UCB1(alpha=0),
        LearningPolicy.UCB1(alpha=0.0),
        LearningPolicy.UCB1(alpha=0.5),
        LearningPolicy.UCB1(alpha=1),
        LearningPolicy.UCB1(alpha=1.0),
        LearningPolicy.UCB1(alpha=5)
    ]

    para_lps = [
        LearningPolicy.LinTS(alpha=0.00001, l2_lambda=1),
        LearningPolicy.LinTS(alpha=0.5, l2_lambda=1),
        LearningPolicy.LinTS(alpha=1, l2_lambda=1),
        LearningPolicy.LinTS(alpha=0.00001, l2_lambda=0.5),
        LearningPolicy.LinTS(alpha=0.5, l2_lambda=0.5),
        LearningPolicy.LinTS(alpha=1, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=0, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=0.5, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=1, l2_lambda=1),
        LearningPolicy.LinUCB(alpha=0, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=0.5, l2_lambda=0.5),
        LearningPolicy.LinUCB(alpha=1, l2_lambda=0.5)
    ]

    # A list of valid context policies
    nps = [
        NeighborhoodPolicy.KNearest(),
        NeighborhoodPolicy.KNearest(k=1),
        NeighborhoodPolicy.KNearest(k=3),
        NeighborhoodPolicy.Radius(),
        NeighborhoodPolicy.Radius(2.5),
        NeighborhoodPolicy.Radius(5)
    ]

    cps = [
        NeighborhoodPolicy.Clusters(),
        NeighborhoodPolicy.Clusters(n_clusters=3),
        NeighborhoodPolicy.Clusters(is_minibatch=True),
        NeighborhoodPolicy.Clusters(n_clusters=3, is_minibatch=True)
    ]

    @staticmethod
    def predict(
        arms: List[Arm],
        decisions: Union[List, np.ndarray, pd.Series],
        rewards: Union[List, np.ndarray, pd.Series],
        learning_policy: Union[LearningPolicy.EpsilonGreedy,
                               LearningPolicy.Random, LearningPolicy.Softmax,
                               LearningPolicy.ThompsonSampling,
                               LearningPolicy.UCB1, LearningPolicy.LinTS,
                               LearningPolicy.LinUCB],
        neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters,
                                   NeighborhoodPolicy.Radius,
                                   NeighborhoodPolicy.KNearest] = None,
        context_history: Union[None, List[Num], List[List[Num]], np.ndarray,
                               pd.DataFrame, pd.Series] = None,
        contexts: Union[None, List[Num], List[List[Num]], np.ndarray,
                        pd.DataFrame, pd.Series] = None,
        seed: Optional[int] = 123456,
        num_run: Optional[int] = 1,
        is_predict: Optional[bool] = True,
        n_jobs: Optional[int] = 1,
        backend: Optional[str] = None
    ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB):
        """Sets up a MAB model and runs the given configuration.

        Return list of predictions or prediction and the mab instance, when is_predict is true
        Return list of expectations or expectation and the mab instance, when is predict is false

        Calls the predict or predict_expectation method num_run number of times.
        """

        # Model
        mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs,
                  backend)

        # Train
        mab.fit(decisions, rewards, context_history)

        # Test
        if is_predict:

            # Return: prediction(s) and the MAB instance
            predictions = [mab.predict(contexts) for _ in range(num_run)]
            return predictions[0] if num_run == 1 else predictions, mab

        else:

            # Return: expectations(s) and the MAB instance
            expectations = [
                mab.predict_expectations(contexts) for _ in range(num_run)
            ]
            return expectations[0] if num_run == 1 else expectations, mab

    def assertListAlmostEqual(self, list1, list2):
        """
        Asserts that floating values in the given lists (almost) equals to each other
        """
        if not isinstance(list1, list):
            list1 = list(list1)

        if not isinstance(list2, list):
            list2 = list(list2)

        self.assertEqual(len(list1), len(list2))

        for index, val in enumerate(list1):
            self.assertAlmostEqual(val, list2[index])