def test_linUCB(self): rng = np.random.RandomState(seed=111) contexts = rng.randint(0, 5, (10, 5)) arm, mab = self.predict( arms=[1, 2, 3, 4, 5], decisions=[1, 1, 4, 2, 2, 2, 3, 3, 3, 1], rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=0.1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=contexts, seed=123456, num_run=1, is_predict=True, n_jobs=2, backend=None) self.assertEqual(arm, [4, 4, 3, 3, 4, 4, 4, 3, 4, 3]) arm, mab = self.predict( arms=[1, 2, 3, 4, 5], decisions=[1, 1, 4, 2, 2, 2, 3, 3, 3, 1], rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=0.1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=contexts, seed=123456, num_run=1, is_predict=True, n_jobs=2, backend='loky') self.assertEqual(arm, [4, 4, 3, 3, 4, 4, 4, 3, 4, 3]) arm, mab = self.predict( arms=[1, 2, 3, 4, 5], decisions=[1, 1, 4, 2, 2, 2, 3, 3, 3, 1], rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=0.1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=contexts, seed=123456, num_run=1, is_predict=True, n_jobs=2, backend='threading') self.assertEqual(arm, [4, 4, 3, 3, 4, 4, 4, 3, 4, 3])
def test_partial_vs_batch_fit(self): # Batch fit context_batch = np.array([[1, 0, 0, 0, 1], [0, 1, 2, 3, 4], [2, 0, 1, 0, 2], [2, 1, 2, 1, 2], [3, 3, 3, 2, 1], [1, 1, 1, 1, 1]]) rewards_batch = np.array([0, 1, 1, 0, 1, 0]) decisions_batch = np.array([1, 1, 1, 0, 0, 1]) arms_batch, mab_batch = self.predict( arms=[0, 1], decisions=decisions_batch, rewards=rewards_batch, learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=context_batch, contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) # Partial fit context = np.array([[1, 0, 0, 0, 1], [0, 1, 2, 3, 4], [2, 0, 1, 0, 2]]) rewards = np.array([0, 1, 1]) decisions = np.array([1, 1, 1]) arms_partial, mab_partial = self.predict( arms=[0, 1], decisions=decisions, rewards=rewards, learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=context, contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) context2 = np.array([[2, 1, 2, 1, 2], [3, 3, 3, 2, 1], [1, 1, 1, 1, 1]]) rewards2 = np.array([0, 1, 0]) decisions2 = np.array([0, 0, 1]) mab_partial.partial_fit(decisions2, rewards2, context2) self.assertListEqual(mab_batch._imp.arm_to_model[0].beta.tolist(), mab_partial._imp.arm_to_model[0].beta.tolist()) self.assertListEqual(mab_batch._imp.arm_to_model[0].Xty.tolist(), mab_partial._imp.arm_to_model[0].Xty.tolist()) self.assertListEqual(mab_batch._imp.arm_to_model[0].A_inv.tolist(), mab_partial._imp.arm_to_model[0].A_inv.tolist()) self.assertListEqual(mab_batch._imp.arm_to_model[1].beta.tolist(), mab_partial._imp.arm_to_model[1].beta.tolist()) self.assertListEqual(mab_batch._imp.arm_to_model[1].Xty.tolist(), mab_partial._imp.arm_to_model[1].Xty.tolist()) self.assertListEqual(mab_batch._imp.arm_to_model[1].A_inv.tolist(), mab_partial._imp.arm_to_model[1].A_inv.tolist())
def test_scaler(self): arms = [1, 2, 3] context_history = np.array( [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [ 0, 2, 2, 3, 5 ], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], dtype='float64') contexts = np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]]) decisions = np.array([1, 1, 1, 2, 2, 3, 3, 3, 3, 3]) rewards = np.array([0, 0, 1, 0, 0, 0, 0, 1, 1, 1]) arm_to_scaler = {} for arm in arms: scaler = StandardScaler() df = context_history[decisions == arm] scaler.fit(np.asarray(df, dtype='float64')) arm_to_scaler[arm] = deepcopy(scaler) exp, mab = self.predict( arms=arms, decisions=decisions, rewards=rewards, learning_policy=LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler), context_history=context_history, contexts=contexts, seed=123456, num_run=1, is_predict=False) for arm in arms: context_history_arm = context_history[decisions == arm] context_history_scaled = arm_to_scaler[arm].transform( np.asarray(context_history_arm, dtype='float64')) contexts_scaled = arm_to_scaler[arm].transform( np.asarray(contexts, dtype='float64')) exp_check, mab = self.predict( arms=arms, decisions=decisions[decisions == arm], rewards=rewards[decisions == arm], learning_policy=LearningPolicy.LinUCB(), context_history=context_history_scaled, contexts=contexts_scaled, seed=123456, num_run=1, is_predict=False) for i in range(len(contexts)): self.assertEqual(exp[i][arm], exp_check[i][arm])
def test_linucb_t5(self): arm, mab = self.predict(arms=['one', 'two', 'three'], decisions=[ 'one', 'one', 'one', 'three', 'two', 'two', 'three', 'one', 'three', 'two' ], rewards=[1, 0, 1, 0, 1, 0, 1, 1, 1, 0], learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=23, num_run=4, is_predict=True) self.assertEqual(len(arm), 4) self.assertEqual( arm, [['two', 'two'], ['two', 'two'], ['two', 'two'], ['two', 'two']])
def test_add_arm_scaler(self): scaler = StandardScaler() scaler.fit( np.array([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]]).astype('float64')) arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)} mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler)) mab.add_arm(2, scaler=deepcopy(scaler))
def test_add_arm(self): context = np.array([[1, 0, 2, 1, 1], [3, 1, 2, 3, 4], [2, -1, 1, 0, 2]]) rewards = np.array([3, 3, 1]) decisions = np.array([1, 1, 1]) arms, mab = self.predict( arms=[0, 1], decisions=decisions, rewards=rewards, learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=context, contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertEqual(mab._imp.num_features, 5) self.assertEqual(arms, [0, 0]) mab.add_arm(2) self.assertTrue(2 in mab._imp.arm_to_model.keys()) self.assertEqual(mab._imp.arm_to_model[2].beta[0], 0) self.assertEqual(mab._imp.arm_to_model[2].beta[1], 0) self.assertEqual(mab._imp.arm_to_model[2].beta[2], 0) self.assertEqual(mab._imp.arm_to_model[2].beta[3], 0) self.assertEqual(mab._imp.arm_to_model[2].beta[4], 0)
def test_unused_arm_scaled(self): context_history = np.array( [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [ 0, 2, 2, 3, 5 ], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], dtype='float64') scaler = StandardScaler() scaled_contexts = scaler.fit_transform(context_history) scaled_predict = scaler.transform( np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], dtype='float64')) exp, mab = self.predict(arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=scaled_contexts, contexts=scaled_predict, seed=123456, num_run=1, is_predict=False) self.assertListAlmostEqual(exp[0].values(), [ 0.702838715092242, 0.8039804426513, 0.8016765077826691, 1.7398913429630314 ]) self.assertListAlmostEqual(exp[1].values(), [ 0.814935740273692, 1.09321065622604, 0.6199330260793201, 1.8228899573337314 ])
def test_df_list(self): df = pd.DataFrame({ 'decisions': [1, 1, 1, 2, 2, 3, 3, 3, 3, 3], 'rewards': [0, 0, 1, 0, 0, 0, 0, 1, 1, 1] }) arm, mab = self.predict(arms=[1, 2, 3], decisions=df['decisions'], rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=3, is_predict=True) self.assertEqual(len(arm), 3) self.assertEqual(arm, [[3, 3], [3, 3], [3, 3]])
def test_unused_arm_scaled2(self): context_history = np.array( [[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [ 0, 2, 2, 3, 5 ], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], dtype='float64') scaler = StandardScaler() scaled_contexts = scaler.fit_transform(context_history) scaled_predict = scaler.transform( np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], dtype='float64')) arms, mab = self.predict( arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=scaled_contexts, contexts=scaled_predict, seed=123456, num_run=1, is_predict=True) self.assertEqual(arms, [4, 4])
def test_invalid_ridge_l2_lambda_type(self): with self.assertRaises(TypeError): self.predict(arms=[1, 2, 3], decisions=[1, 1, 1], rewards=[0, 0, 0], learning_policy=LearningPolicy.LinUCB(alpha=1, l2_lambda=None), neighborhood_policy=NeighborhoodPolicy.KNearest(2), context_history=np.array([1, 1, 1]), contexts=np.array([[1, 1]]), seed=123456, num_run=1, is_predict=True) with self.assertRaises(TypeError): self.predict(arms=[1, 2, 3], decisions=[1, 1, 1], rewards=[0, 0, 0], learning_policy=LearningPolicy.LinTS(alpha=1, l2_lambda=None), neighborhood_policy=NeighborhoodPolicy.KNearest(2), context_history=np.array([1, 1, 1]), contexts=np.array([[1, 1]]), seed=123456, num_run=1, is_predict=True)
def test_invalid_add_arm_scaler(self): scaler = StandardScaler() arm_to_scaler = {0: deepcopy(scaler), 1: deepcopy(scaler)} mab = MAB([0, 1], LearningPolicy.LinUCB(arm_to_scaler=arm_to_scaler)) with self.assertRaises(TypeError): mab.add_arm(2, scaler=deepcopy(scaler))
def test_fit_twice_new_features(self): context = np.array([[1, 0, 2, 1, 1], [3, 1, 2, 3, 4], [2, -1, 1, 0, 2], [-1, 4, 2, 0, 1], [2, 2, 2, 2, 2], [3, 2, 1, 2, 3], [0, 0, 0, 0, 0], [2, 1, 1, 1, 2], [3, 2, 3, 2, 3], [8, 2, 3, 1, 0], [1, 2, -9, -7, 1], [0, 1, 1, 1, 1]]) rewards = np.array([3, 3, 1, 0, -1, 2, 1, 2, 1, 1, 0, 3]) decisions = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) arms, mab = self.predict(arms=[0, 1], decisions=decisions, rewards=rewards, learning_policy=LearningPolicy.LinUCB( alpha=1, l2_lambda=0), context_history=context, contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertEqual(mab._imp.num_features, 5) self.assertEqual(arms, [1, 1]) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[0], 0.09224215, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[1], -0.20569848, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[2], 0.13434242, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[3], -0.1000045, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[4], 0.63726682, abs_tol=0.00000001)) context2 = np.array([[1, 0, 2, 1, 1, 3], [3, 1, 2, 3, 4, 1], [2, -1, 1, 0, 2, 2], [-1, 4, 2, 0, 1, 0], [1, 2, 3, 4, 5, 1]]) rewards2 = np.array([-1, 2, 1, 2, 0]) decisions2 = np.array([1, 1, 1, 1, 1]) mab.fit(decisions2, rewards2, context2) self.assertEqual(mab._imp.num_features, 6)
def test_invalid_lp_arg(self): with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.UCB1(epsilon=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.EpsilonGreedy(alpha=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.ThompsonSampling(alpha=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.Softmax(alpha=2)) with self.assertRaises(TypeError): MAB(['a', 'b'], LearningPolicy.LinUCB(tau=1))
def test_linucb(self): train_df = pd.DataFrame({ 'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5], 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10], 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38], 'click_rate': [ 0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83 ], 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0] }) # Test data to for new prediction test_df = pd.DataFrame({ 'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1] }) test_df_revenue = pd.Series([7, 13]) # Scale the data scaler = StandardScaler() train = scaler.fit_transform( np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64')) test = scaler.transform(np.asarray(test_df, dtype='float64')) arms, mab = self.predict( arms=[1, 2, 3, 4, 5], decisions=train_df['ad'], rewards=train_df['revenues'], learning_policy=LearningPolicy.LinUCB(alpha=1.25), context_history=train, contexts=test, seed=123456, num_run=1, is_predict=True) self.assertEqual(arms, [5, 2]) mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test) mab.add_arm(6) self.assertTrue(6 in mab.arms) self.assertTrue(6 in mab._imp.arm_to_expectation.keys())
def test_unused_arm2(self): arms, mab = self.predict( arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertEqual(arms, [4, 4])
def test_partial_fit(self): arm, mab = self.predict(arms=[1, 2, 3, 4], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertEqual(arm, [4, 4]) b_1 = mab._imp.arm_to_model[1].beta self.assertTrue(math.isclose(-0.0825688, b_1[0], abs_tol=0.00001)) b_3 = mab._imp.arm_to_model[3].beta self.assertTrue(math.isclose(0.023696, b_3[0], abs_tol=0.00001)) self.assertTrue(4 in mab._imp.arm_to_model.keys()) # Fit again decisions2 = [1, 3, 4] rewards2 = [0, 1, 1] context_history2 = [[0, 1, 1, 1, 1], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0]] mab.partial_fit(decisions2, rewards2, context_history2) b_1 = mab._imp.arm_to_model[1].beta self.assertTrue(math.isclose(-0.05142857, b_1[0], abs_tol=0.00001)) b_3 = mab._imp.arm_to_model[3].beta self.assertTrue(math.isclose(b_3[0], 0.22099152, abs_tol=0.00001)) b_4 = mab._imp.arm_to_model[4].beta self.assertEqual(b_4[0], 0)
def test_linucb_t2(self): arm, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 3, 2, 2, 3, 1, 3, 1], rewards=[0, 1, 1, 0, 1, 0, 1, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=1.5), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=71, num_run=4, is_predict=True) self.assertEqual(len(arm), 4) self.assertEqual(arm, [[2, 2], [2, 2], [2, 2], [2, 2]])
def test_np(self): arm, mab = self.predict( arms=[1, 2, 3], decisions=np.asarray([1, 1, 1, 2, 2, 3, 3, 3, 3, 3]), rewards=np.asarray([0, 0, 1, 0, 0, 0, 0, 1, 1, 1]), learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=3, is_predict=True) self.assertEqual(len(arm), 3) self.assertEqual(arm, [[3, 3], [3, 3], [3, 3]])
def test_linucb_t8(self): arm, mab = self.predict( arms=['a', 'b', 'c'], decisions=['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a'], rewards=[-1.25, 0.7, 12, 10, 12, 9.2, -1, -10, 4, 0], learning_policy=LearningPolicy.LinUCB(alpha=0.5), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=9, num_run=4, is_predict=True) self.assertEqual(len(arm), 4) self.assertEqual(arm, [['c', 'c'], ['c', 'c'], ['c', 'c'], ['c', 'c']])
def test_alpha0_nearest5(self): arm, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1], rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=0), neighborhood_policy=NeighborhoodPolicy.KNearest(k=5), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=3, is_predict=True) self.assertEqual(len(arm), 3) self.assertEqual(arm, [[3, 3], [3, 3], [3, 3]])
def test_alpha0_expectations(self): exps, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1], rewards=[0, 0, 0, 0, 0, 0, 1, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=0), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=False) self.assertListAlmostEqual( exps[0].values(), [-0.018378378378378413, 0.0, 0.9966292134831471]) self.assertListAlmostEqual( exps[1].values(), [0.14054054054054055, 0.0, 0.43258426966292074])
def test_linucb_t4(self): arm, mab = self.predict( arms=[1, 2, 4], decisions=[1, 1, 4, 4, 2, 2, 1, 1, 4, 2, 1, 4, 1, 2, 4, 1], rewards=[7, 9, 10, 20, 2, 5, 8, 15, 17, 11, 0, 5, 2, 9, 3, 1], learning_policy=LearningPolicy.LinUCB(alpha=2), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=23, num_run=4, is_predict=True) self.assertEqual(len(arm), 4) self.assertEqual(arm, [[4, 4], [4, 4], [4, 4], [4, 4]])
def test_linucb_knearest(self): train_df = pd.DataFrame({ 'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5], 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10], 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38], 'click_rate': [ 0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17, 0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83 ], 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0] }) # Test data to for new prediction test_df = pd.DataFrame({ 'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1] }) # Scale the data scaler = StandardScaler() train = scaler.fit_transform( np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64')) test = scaler.transform(np.asarray(test_df, dtype='float64')) arms, mab = self.predict( arms=[1, 2, 3, 4, 5], decisions=train_df['ad'], rewards=train_df['revenues'], learning_policy=LearningPolicy.LinUCB(alpha=1.25), neighborhood_policy=NeighborhoodPolicy.KNearest(k=4), context_history=train, contexts=test, seed=123456, num_run=1, is_predict=True) self.assertEqual(arms, [1, 2])
def test_alpha1_expectations(self): exps, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3], rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=False) self.assertListAlmostEqual( exps[0].values(), [0.9790312458954391, 1.1617706239438832, 1.4247056229871702]) self.assertListAlmostEqual( exps[1].values(), [0.8896475809353053, 0.923364043088837, 1.457085577251709])
def test_l2_low(self): context = np.array([[1, 1, 0, 0, 1], [0, 1, 2, 9, 4], [2, 3, 1, 0, 2]]) rewards = np.array([3, 2, 1]) decisions = np.array([1, 1, 1]) arms, mab = self.predict(arms=[0, 1], decisions=decisions, rewards=rewards, learning_policy=LearningPolicy.LinUCB( alpha=1, l2_lambda=0.1), context_history=context, contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertEqual(mab._imp.num_features, 5) self.assertEqual(arms, [1, 1]) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[0], 1.59499705, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[1], -0.91856183, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[2], -2.49775977, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[3], 0.14219195, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[4], 1.65819347, abs_tol=0.00000001))
def test_l2_high(self): context = np.array([[1, 1, 0, 0, 1], [0, 1, 2, 9, 4], [2, 3, 1, 0, 2]]) rewards = np.array([3, 2, 1]) decisions = np.array([1, 1, 1]) arms, mab = self.predict(arms=[0, 1], decisions=decisions, rewards=rewards, learning_policy=LearningPolicy.LinUCB( alpha=1, l2_lambda=10), context_history=context, contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=1, is_predict=True) self.assertEqual(mab._imp.num_features, 5) self.assertEqual(arms, [0, 0]) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[0], 0.18310155, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[1], 0.16372811, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[2], -0.00889076, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[3], 0.09434416, abs_tol=0.00000001)) self.assertTrue( math.isclose(mab._imp.arm_to_model[1].beta[4], 0.22503229, abs_tol=0.00000001))
def test_linucb_t9(self): # Dates to test a = datetime.datetime(2018, 1, 1) b = datetime.datetime(2017, 7, 31) c = datetime.datetime(2018, 9, 15) arm, mab = self.predict( arms=[a, b, c], decisions=[a, b, c, a, b, c, a, b, c, a], rewards=[1.25, 0.7, 12, 10, 1.43, 0.2, -1, -10, 4, 0], learning_policy=LearningPolicy.LinUCB(alpha=0.25), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=123456, num_run=4, is_predict=True) self.assertEqual(len(arm), 4) self.assertEqual(arm, [[c, c], [c, c], [c, c], [c, c]])
def test_linucb_t10(self): # Dates to test a = datetime.datetime(2018, 1, 1) b = datetime.datetime(2017, 7, 31) c = datetime.datetime(2018, 9, 15) arm, mab = self.predict( arms=[a, b, c], decisions=[a, b, c, a, b, c, a, b, c, a, b, b, a], rewards=[7, 12, 1, -10, 5, 1, 2, 9, 3, 3, 6, 7, 1], learning_policy=LearningPolicy.LinUCB(alpha=1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0], [0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0]], contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], seed=7, num_run=4, is_predict=True) self.assertEqual(len(arm), 4) self.assertEqual(arm, [[b, b], [b, b], [b, b], [b, b]])
def test_linUCB_expectations(self): rng = np.random.RandomState(seed=111) contexts = rng.randint(0, 5, (8, 5)) expected_pred = [ [1.1923304881612438, 0.386812974778054, 2.036795075137375], [1.1383448695075555, 0.16604895162348998, 0.7454336659862624], [0.39044990078495967, 0.32572728761335573, 1.0533787080477959], [-0.9557496857893883, 0.4393900133310143, 1.4663248923093817], [-0.4630963822269796, 0.44282983853389307, 1.4430098512988918], [0.26667599463140623, 0.34807480426506293, 1.008245109800643], [1.3255310649960248, 0.43761043197507354, 0.9787023941693738], [0.33267910305673676, 0.29690114350965546, 1.460951676645638] ] exps, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1], rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=0.1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=contexts, seed=123456, num_run=1, is_predict=False, n_jobs=1) for i in range(len(expected_pred)): self.assertListAlmostEqual(exps[i].values(), expected_pred[i]) exps, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1], rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=0.1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=contexts, seed=123456, num_run=1, is_predict=False, n_jobs=2) for i in range(len(expected_pred)): self.assertListAlmostEqual(exps[i].values(), expected_pred[i]) exps, mab = self.predict( arms=[1, 2, 3], decisions=[1, 1, 1, 2, 2, 2, 3, 3, 3, 1], rewards=[0, 0, 1, 0, 0, 0, 1, 1, 1, 1], learning_policy=LearningPolicy.LinUCB(alpha=0.1), context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], contexts=contexts, seed=123456, num_run=1, is_predict=False, n_jobs=-1) for i in range(len(expected_pred)): self.assertListAlmostEqual(exps[i].values(), expected_pred[i])
class BaseTest(unittest.TestCase): # A list of valid learning policies lps = [ LearningPolicy.EpsilonGreedy(), LearningPolicy.EpsilonGreedy(epsilon=0), LearningPolicy.EpsilonGreedy(epsilon=0.0), LearningPolicy.EpsilonGreedy(epsilon=0.5), LearningPolicy.EpsilonGreedy(epsilon=1), LearningPolicy.EpsilonGreedy(epsilon=1.0), LearningPolicy.Random(), LearningPolicy.Softmax(), LearningPolicy.Softmax(tau=0.1), LearningPolicy.Softmax(tau=0.5), LearningPolicy.Softmax(tau=1), LearningPolicy.Softmax(tau=1.0), LearningPolicy.Softmax(tau=5.0), LearningPolicy.ThompsonSampling(), LearningPolicy.UCB1(), LearningPolicy.UCB1(alpha=0), LearningPolicy.UCB1(alpha=0.0), LearningPolicy.UCB1(alpha=0.5), LearningPolicy.UCB1(alpha=1), LearningPolicy.UCB1(alpha=1.0), LearningPolicy.UCB1(alpha=5) ] para_lps = [ LearningPolicy.LinTS(alpha=0.00001, l2_lambda=1), LearningPolicy.LinTS(alpha=0.5, l2_lambda=1), LearningPolicy.LinTS(alpha=1, l2_lambda=1), LearningPolicy.LinTS(alpha=0.00001, l2_lambda=0.5), LearningPolicy.LinTS(alpha=0.5, l2_lambda=0.5), LearningPolicy.LinTS(alpha=1, l2_lambda=0.5), LearningPolicy.LinUCB(alpha=0, l2_lambda=1), LearningPolicy.LinUCB(alpha=0.5, l2_lambda=1), LearningPolicy.LinUCB(alpha=1, l2_lambda=1), LearningPolicy.LinUCB(alpha=0, l2_lambda=0.5), LearningPolicy.LinUCB(alpha=0.5, l2_lambda=0.5), LearningPolicy.LinUCB(alpha=1, l2_lambda=0.5) ] # A list of valid context policies nps = [ NeighborhoodPolicy.KNearest(), NeighborhoodPolicy.KNearest(k=1), NeighborhoodPolicy.KNearest(k=3), NeighborhoodPolicy.Radius(), NeighborhoodPolicy.Radius(2.5), NeighborhoodPolicy.Radius(5) ] cps = [ NeighborhoodPolicy.Clusters(), NeighborhoodPolicy.Clusters(n_clusters=3), NeighborhoodPolicy.Clusters(is_minibatch=True), NeighborhoodPolicy.Clusters(n_clusters=3, is_minibatch=True) ] @staticmethod def predict( arms: List[Arm], decisions: Union[List, np.ndarray, pd.Series], rewards: Union[List, np.ndarray, pd.Series], learning_policy: Union[LearningPolicy.EpsilonGreedy, LearningPolicy.Random, LearningPolicy.Softmax, LearningPolicy.ThompsonSampling, LearningPolicy.UCB1, LearningPolicy.LinTS, LearningPolicy.LinUCB], neighborhood_policy: Union[None, NeighborhoodPolicy.Clusters, NeighborhoodPolicy.Radius, NeighborhoodPolicy.KNearest] = None, context_history: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, contexts: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None, seed: Optional[int] = 123456, num_run: Optional[int] = 1, is_predict: Optional[bool] = True, n_jobs: Optional[int] = 1, backend: Optional[str] = None ) -> (Union[Arm, List[Arm], List[float], List[List[float]]], MAB): """Sets up a MAB model and runs the given configuration. Return list of predictions or prediction and the mab instance, when is_predict is true Return list of expectations or expectation and the mab instance, when is predict is false Calls the predict or predict_expectation method num_run number of times. """ # Model mab = MAB(arms, learning_policy, neighborhood_policy, seed, n_jobs, backend) # Train mab.fit(decisions, rewards, context_history) # Test if is_predict: # Return: prediction(s) and the MAB instance predictions = [mab.predict(contexts) for _ in range(num_run)] return predictions[0] if num_run == 1 else predictions, mab else: # Return: expectations(s) and the MAB instance expectations = [ mab.predict_expectations(contexts) for _ in range(num_run) ] return expectations[0] if num_run == 1 else expectations, mab def assertListAlmostEqual(self, list1, list2): """ Asserts that floating values in the given lists (almost) equals to each other """ if not isinstance(list1, list): list1 = list(list1) if not isinstance(list2, list): list2 = list(list2) self.assertEqual(len(list1), len(list2)) for index, val in enumerate(list1): self.assertAlmostEqual(val, list2[index])