Python LearningPolicyの例、mabwiser.mab.LearningPolicy Pythonの例

コード例 #1

0

ファイルを表示

    def test_numpy_rewards(self):

        exp, mab = self.predict(
            arms=["one", "two"],
            decisions=["one", "one", "one", "two", "two", "two"],
            rewards=np.array([1, 1, 1, 0, 1, 1]),
            learning_policy=LearningPolicy.Popularity(),
            seed=123456,
            num_run=1,
            is_predict=False)

        # Initial probabilities
        self.assertAlmostEqual(1.0, exp["one"] + exp["two"])
        self.assertAlmostEqual(exp["one"], 0.6)
        self.assertAlmostEqual(exp["two"], 0.4)

コード例 #2

0

ファイルを表示

    def test_3arm_equal_prob(self):
        arm, mab = self.predict(arms=[1, 2, 3],
                                decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3],
                                rewards=[1, 1, 1, 1, 1, 1, 1, 1, 1],
                                learning_policy=LearningPolicy.Popularity(),
                                seed=123456,
                                num_run=5,
                                is_predict=True)

        self.assertEqual(arm, [1, 3, 1, 3, 2])

        exp, mab = self.predict(arms=[1, 2, 3],
                                decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3],
                                rewards=[1, 1, 1, 1, 1, 1, 1, 1, 1],
                                learning_policy=LearningPolicy.Popularity(),
                                seed=123456,
                                num_run=1,
                                is_predict=False)

        # Check that normalized probabilities are 1/3 each, and sum up to 1.0
        self.assertAlmostEqual(1.0, exp[1] + exp[2] + exp[3])
        self.assertAlmostEqual(exp[1], exp[2])
        self.assertAlmostEqual(exp[2], exp[3])
        self.assertAlmostEqual(exp[3], exp[1])

コード例 #3

0

ファイルを表示

    def test_unused_arm(self):

        exp, mab = self.predict(arms=[1, 2, 3],
                                decisions=[1, 1, 1, 2, 2, 2],
                                rewards=[1, 1, 1, 0, 1, 1],
                                learning_policy=LearningPolicy.Popularity(),
                                seed=123456,
                                num_run=1,
                                is_predict=False)

        # Initial probabilities
        self.assertAlmostEqual(1.0, exp[1] + exp[2] + exp[3])
        self.assertAlmostEqual(exp[1], 0.6)
        self.assertAlmostEqual(exp[2], 0.4)
        self.assertAlmostEqual(exp[3], 0.0)

コード例 #4

0

ファイルを表示

    def test_tau1_expectations(self):

        arm, mab = self.predict(arms=[1, 2, 3],
                                decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
                                learning_policy=LearningPolicy.Softmax(tau=1),
                                seed=123456,
                                num_run=1,
                                is_predict=False)

        self.assertDictEqual(arm, {
            1: 0.4083425853583662,
            2: 0.20965007375301267,
            3: 0.3820073408886212
        })

コード例 #5

0

ファイルを表示

    def test_ucb_t5(self):

        arm, mab = self.predict(arms=['one', 'two', 'three'],
                                decisions=[
                                    'one', 'one', 'one', 'three', 'two', 'two',
                                    'three', 'one', 'three', 'two'
                                ],
                                rewards=[1, 0, 1, 0, 1, 0, 1, 1, 1, 0],
                                learning_policy=LearningPolicy.UCB1(alpha=1),
                                seed=23,
                                num_run=4,
                                is_predict=True)

        self.assertEqual(len(arm), 4)
        self.assertEqual(arm, ['three', 'three', 'three', 'three'])

コード例 #6

0

ファイルを表示

ファイル: test_greedy.py プロジェクト: pjackson31/mabwiser

    def test_greedy_t6(self):

        arms, mab = self.predict(
            arms=['one', 'two', 'three'],
            decisions=[
                'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one',
                'three', 'two'
            ],
            rewards=[2, 7, 7, 9, 1, 3, 1, 2, 6, 4],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5),
            seed=17,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, ['three', 'one', 'three', 'one'])

コード例 #7

0

ファイルを表示

ファイル: test_greedy.py プロジェクト: pjackson31/mabwiser

    def test_greedy_t5(self):

        arms, mab = self.predict(
            arms=['one', 'two', 'three'],
            decisions=[
                'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one',
                'three', 'two'
            ],
            rewards=[1, 0, 1, 0, 1, 0, 1, 1, 1, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.25),
            seed=123456,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, ['three', 'one', 'one', 'one'])

コード例 #8

0

ファイルを表示

    def test_thompson_t5(self):

        arms, mab = self.predict(
            arms=['one', 'two', 'three'],
            decisions=[
                'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one',
                'three', 'two'
            ],
            rewards=[1, 0, 1, 0, 1, 0, 1, 1, 1, 0],
            learning_policy=LearningPolicy.ThompsonSampling(),
            seed=123456,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, ['one', 'one', 'one', 'three'])

コード例 #9

0

ファイルを表示

ファイル: test_greedy.py プロジェクト: pjackson31/mabwiser

    def test_add_arm(self):

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 3, 2, 2, 3, 1, 3],
            rewards=[0, 1, 1, 0, 1, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.25),
            seed=123456,
            num_run=4,
            is_predict=True)
        mab.add_arm(4)
        self.assertTrue(4 in mab.arms)
        self.assertTrue(4 in mab._imp.arms)
        self.assertTrue(4 in mab._imp.arm_to_expectation.keys())
        self.assertTrue(mab._imp.arm_to_sum[4] == 0)

コード例 #10

0

ファイルを表示

ファイル: test_greedy.py プロジェクト: pjackson31/mabwiser

    def test_seed_epsilon50(self):

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3],
            rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5),
            seed=123456,
            num_run=5,
            is_predict=True)

        self.assertListEqual(arms, [3, 3, 3, 3, 3])

        # change seed and assert a different result
        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3],
            rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.5),
            seed=123,
            num_run=5,
            is_predict=True)

        self.assertListEqual(arms, [3, 1, 3, 3, 2])

コード例 #11

0

ファイルを表示

    def test_softmax_t5(self):

        arm, mab = self.predict(
            arms=['one', 'two', 'three'],
            decisions=[
                'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one',
                'three', 'two'
            ],
            rewards=[1, 0, 1, 0, 1, 0, 1, 1, 1, 0],
            learning_policy=LearningPolicy.Softmax(tau=1.5),
            seed=123456,
            num_run=4,
            is_predict=True)

        self.assertEqual(arm, ['one', 'three', 'one', 'three'])

コード例 #12

0

ファイルを表示

    def test_approximate(self):
        train_df = pd.DataFrame({
            'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
            'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
            'age':
            [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
            'click_rate': [
                0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56,
                0.22, 0.19, 0.11, 0.83
            ],
            'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]
        })

        # Test data to for new prediction
        test_df = pd.DataFrame({
            'age': [37, 52],
            'click_rate': [0.5, 0.6],
            'subscriber': [0, 1]
        })
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(
            np.asarray(train_df[['age', 'click_rate', 'subscriber']],
                       dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(
            arms=[1, 2, 3, 4, 5],
            decisions=train_df['ad'],
            rewards=train_df['revenues'],
            learning_policy=LearningPolicy.UCB1(alpha=1.25),
            neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_tables=5,
                                                              n_dimensions=5),
            context_history=train,
            contexts=test,
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arms, [1, 1])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())

コード例 #13

0

ファイルを表示

    def test_ucb_t6(self):

        arm, mab = self.predict(
            arms=['one', 'two', 'three'],
            decisions=[
                'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one',
                'three', 'two'
            ],
            rewards=[2, 7, 7, 9, 1, 3, 1, 2, 6, 4],
            learning_policy=LearningPolicy.UCB1(alpha=1.25),
            seed=17,
            num_run=4,
            is_predict=True)

        self.assertEqual(len(arm), 4)
        self.assertEqual(arm, ['three', 'three', 'three', 'three'])

コード例 #14

0

ファイルを表示

    def test_softmax_t9(self):

        # Dates to test
        a = datetime.datetime(2018, 1, 1)
        b = datetime.datetime(2017, 7, 31)
        c = datetime.datetime(2018, 9, 15)

        arm, mab = self.predict(arms=[a, b, c],
                                decisions=[a, b, c, a, b, c, a, b, c, a],
                                rewards=[1.25, 0.7, 12, 10, 1.43, 0.2, -1, -10, 4, 0],
                                learning_policy=LearningPolicy.Softmax(tau=1.25),
                                seed=123456,
                                num_run=4,
                                is_predict=True)

        self.assertEqual(arm, [a, c, a, c])

コード例 #15

0

ファイルを表示

    def test_add_arm(self):

        arm, mab = self.predict(arms=[1, 2, 3, 4],
                                decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
                                learning_policy=LearningPolicy.Softmax(tau=1),
                                seed=123456,
                                num_run=1,
                                is_predict=False)

        mab.add_arm(5)
        self.assertTrue(5 in mab.arms)
        self.assertTrue(5 in mab._imp.arms)
        self.assertTrue(5 in mab._imp.arm_to_expectation.keys())
        self.assertTrue(mab._imp.arm_to_mean[5] == 0)
        self.assertTrue(mab._imp.arm_to_expectation[4] == mab._imp.arm_to_expectation[5])

コード例 #16

0

ファイルを表示

ファイル: test_greedy.py プロジェクト: pjackson31/mabwiser

    def test_greedy_t10(self):

        # Dates for testing
        a = datetime.datetime(2018, 1, 1)
        b = datetime.datetime(2017, 7, 31)
        c = datetime.datetime(2018, 9, 15)

        arms, mab = self.predict(
            arms=[a, b, c],
            decisions=[a, b, c, a, b, c, a, b, c, a, b, b],
            rewards=[7, 12, 1, -10, 5, 1, 2, 9, 3, 3, 6, 7],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.33),
            seed=7,
            num_run=4,
            is_predict=True)
        self.assertEqual(arms, [b, a, a, c])

コード例 #17

0

ファイルを表示

    def test_softmax_t10(self):

        # Dates to test
        a = datetime.datetime(2018, 1, 1)
        b = datetime.datetime(2017, 7, 31)
        c = datetime.datetime(2018, 9, 15)

        arm, mab = self.predict(arms=[a, b, c],
                                decisions=[a, b, c, a, b, c, a, b, c, a, b, b],
                                rewards=[7, 12, 1, -10, 5, 1, 2, 9, 3, 3, 6, 7],
                                learning_policy=LearningPolicy.Softmax(tau=0.33),
                                seed=7,
                                num_run=5,
                                is_predict=True)

        self.assertEqual(arm, [b, b, b, b, c])

コード例 #18

0

ファイルを表示

    def test_ts_series(self):

        df = pd.DataFrame({
            'layouts': [1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1],
            'revenues': [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10]
        })

        arm, mab = self.predict(
            arms=[1, 2],
            decisions=df['layouts'],
            rewards=df['revenues'],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15),
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertEqual(arm, 1)

コード例 #19

0

ファイルを表示

    def test_thompson_df_list(self):

        df = pd.DataFrame({
            "decisions": [1, 1, 1, 2, 2, 2, 3, 3, 3],
            "rewards": [0, 0, 0, 0, 0, 0, 1, 1, 1]
        })

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=df["decisions"],
            rewards=[0, 0, 1, 0, 1, 0, 1, 1, 1],
            learning_policy=LearningPolicy.ThompsonSampling(),
            seed=123456,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, [2, 3, 3, 3])

コード例 #20

0

ファイルを表示

    def test_thompson_t4(self):

        dec_to_threshold = {1: 5, 2: 5, 4: 5}

        def binarize(dec, reward):
            return reward >= dec_to_threshold[dec]

        arms, mab = self.predict(
            arms=[1, 2, 4],
            decisions=[1, 1, 4, 4, 2, 2, 1, 1, 4, 2, 1, 4, 1, 2, 4],
            rewards=[7, 9, 10, 20, 2, 5, 8, 15, 17, 11, 0, 5, 2, 9, 3],
            learning_policy=LearningPolicy.ThompsonSampling(binarize),
            seed=23,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, [2, 2, 4, 2])

コード例 #21

0

ファイルを表示

ファイル: test_greedy.py プロジェクト: pjackson31/mabwiser

    def test_greedy_t9(self):

        # Dates for testing
        a = datetime.datetime(2018, 1, 1)
        b = datetime.datetime(2017, 7, 31)
        c = datetime.datetime(2018, 9, 15)

        arms, mab = self.predict(
            arms=[a, b, c],
            decisions=[a, b, c, a, b, c, a, b, c, a],
            rewards=[1.25, 0.7, 12, 10, 1.43, 0.2, -1, -10, 4, 0],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.25),
            seed=123456,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, [c, c, c, c])

コード例 #22

0

ファイルを表示

    def test_thompson_t8(self):

        dec_to_threshold = {'a': 1, 'b': 1, 'c': 1}

        def binarize(dec, reward):
            return reward >= dec_to_threshold[dec]

        arms, mab = self.predict(
            arms=['a', 'b', 'c'],
            decisions=['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a'],
            rewards=[-1.25, 0.7, 12, 10, 12, 9.2, -1, -10, 4, 0],
            learning_policy=LearningPolicy.ThompsonSampling(binarize),
            seed=9,
            num_run=5,
            is_predict=True)

        self.assertEqual(arms, ['c', 'c', 'c', 'c', 'c'])

コード例 #23

0

ファイルを表示

    def test_add_arm_new_function(self):
        def bin1(dec, reward):
            if dec == 0:
                if reward > 50:
                    return 1
                else:
                    return 0
            elif dec == 1:
                if reward < 20:
                    return 1
                else:
                    return 0

        arm, mab = self.predict(
            arms=[0, 1],
            decisions=[1, 0, 1, 1, 0],
            rewards=[10, 4, 3, 70, 6],
            learning_policy=LearningPolicy.ThompsonSampling(bin1),
            seed=123456,
            num_run=1,
            is_predict=True)

        self.assertIs(mab._imp.binarizer, bin1)

        def bin2(dec, reward):
            if dec == 0:
                if reward > 50:
                    return 1
                else:
                    return 0
            elif dec == 1:
                if reward < 20:
                    return 1
                else:
                    return 0
            elif dec == 2:
                if reward >= 1:
                    return 1
                else:
                    return 0

        mab.add_arm(2, bin2)

        self.assertTrue(mab._imp.arm_to_fail_count[2] == 1)
        self.assertTrue(mab._imp.arm_to_success_count[2] == 1)
        self.assertIs(mab._imp.binarizer, bin2)

コード例 #24

0

ファイルを表示

ファイル: test_ucb.py プロジェクト: pjackson31/mabwiser

    def test_ucb_t10(self):

        # Dates to test
        a = datetime.datetime(2018, 1, 1)
        b = datetime.datetime(2017, 7, 31)
        c = datetime.datetime(2018, 9, 15)

        arm, mab = self.predict(arms=[a, b, c],
                                decisions=[a, b, c, a, b, c, a, b, c, a, b, b],
                                rewards=[7, 12, 1, -10, 5, 1, 2, 9, 3, 3, 6, 7],
                                learning_policy=LearningPolicy.UCB1(alpha=1),
                                seed=7,
                                num_run=4,
                                is_predict=True)

        self.assertEqual(len(arm), 4)
        self.assertEqual(arm, [b, b, b, b])

コード例 #25

0

ファイルを表示

ファイル: test_greedy.py プロジェクト: pjackson31/mabwiser

    def test_epsilon25_df_list(self):

        df = pd.DataFrame({
            "decisions": [1, 1, 1, 2, 2, 2, 3, 3, 3],
            "rewards": [0, 0, 0, 0, 0, 0, 1, 1, 1]
        })

        arms, mab = self.predict(
            arms=[1, 2, 3],
            decisions=df["decisions"],
            rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1],
            learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.25),
            seed=7,
            num_run=4,
            is_predict=True)

        self.assertEqual(arms, [2, 3, 3, 3])

コード例 #26

0

ファイルを表示

ファイル: test_lshnearest.py プロジェクト: pjackson31/mabwiser

    def test_greedy0_d2(self):

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                 rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.LSHNearest(n_dimensions=2),
                                 context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                                                  [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                                                  [0, 2, 1, 0, 0]],
                                 contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertListEqual(arms, [3, 1])