def test_reset(self): bandit = Bandit.run(Config()) bandit.register_arm(1) bandit.select_arm('player') bandit.register_reward('player', 1, 10) ret = bandit.reset('player') self.assertIsInstance(ret, bool)
def test_get_arm_info(self): from jubatus.bandit.types import ArmInfo bandit = Bandit.run(Config()) bandit.register_arm(1) bandit.select_arm('player') ret = bandit.get_arm_info('player') self.assertIsInstance(ret, dict) for name, info in ret.items(): self.assertIsInstance(name, str) self.assertIsInstance(info, ArmInfo)
return 0.0 # Experimental config. # Which slot machine should we choose? iteration = 1000 slots = { 'bad': Slot(0.1, 50, 10), # E[R] = 5: bad arm 'normal': Slot(0.01, 600, 100), # E[R] = 6: normal arm 'good': Slot(0.001, 8000, 1000) # E[R] = 8: good arm } # Launch bandit service. player = 'Jubatan' config = Config(method='epsilon_greedy', parameter={'epsilon': 0.1}) bandit = Bandit.run(config) # Initialize bandit settings. bandit.reset(player) for name, slot in slots.items(): bandit.register_arm(name) # Select arms and get rewards. cumulative_reward = 0 for i in range(iteration): arm = bandit.select_arm(player) reward = float(slots[arm].reward()) bandit.register_reward(player, arm, reward) cumulative_reward += reward # Show result.
def test_select_arm(self): bandit = Bandit.run(Config()) bandit.register_arm(1) ret = bandit.select_arm('player') self.assertEqual(ret, str(1))
def test_delete_arm(self): bandit = Bandit.run(Config()) bandit.register_arm(1) ret = bandit.delete_arm(1) self.assertIsInstance(ret, bool)
def test_embedded(self): Bandit.run(Config(), embedded=True)
def test_simple_launch(self): Bandit.run(Config())