Esempio n. 1
0
 def test_method_params(self):
     for method in Config.methods():
         self.assertTrue('assume_unrewarded' in Config(
             method=method)['parameter'])
     self.assertTrue('epsilon' in Config(
         method='epsilon_greedy')['parameter'])
     self.assertTrue('tau' in Config(method='softmax')['parameter'])
     self.assertTrue('gamma' in Config(method='exp3')['parameter'])
Esempio n. 2
0
 def test_method_params(self):
   for method in Config.methods():
     self.assertTrue(
       'assume_unrewarded' in Config(method=method)['parameter'])
   self.assertTrue('epsilon' in Config(method='epsilon_greedy')['parameter'])
   self.assertTrue('tau' in Config(method='softmax')['parameter'])
   self.assertTrue('gamma' in Config(method='exp3')['parameter'])
Esempio n. 3
0
 def test_reset(self):
     bandit = Bandit.run(Config())
     bandit.register_arm(1)
     bandit.select_arm('player')
     bandit.register_reward('player', 1, 10)
     ret = bandit.reset('player')
     self.assertIsInstance(ret, bool)
Esempio n. 4
0
 def test_get_arm_info(self):
     from jubatus.bandit.types import ArmInfo
     bandit = Bandit.run(Config())
     bandit.register_arm(1)
     bandit.select_arm('player')
     ret = bandit.get_arm_info('player')
     self.assertIsInstance(ret, dict)
     for name, info in ret.items():
         self.assertIsInstance(name, str)
         self.assertIsInstance(info, ArmInfo)
Esempio n. 5
0
        else:
            return 0.0


# Experimental config.
# Which slot machine should we choose?
iteration = 1000
slots = {
    'bad': Slot(0.1, 50, 10),  # E[R] = 5: bad arm
    'normal': Slot(0.01, 600, 100),  # E[R] = 6: normal arm
    'good': Slot(0.001, 8000, 1000)  # E[R] = 8: good arm
}

# Launch bandit service.
player = 'Jubatan'
config = Config(method='epsilon_greedy', parameter={'epsilon': 0.1})
bandit = Bandit.run(config)

# Initialize bandit settings.
bandit.reset(player)
for name, slot in slots.items():
    bandit.register_arm(name)

# Select arms and get rewards.
cumulative_reward = 0
for i in range(iteration):
    arm = bandit.select_arm(player)
    reward = float(slots[arm].reward())
    bandit.register_reward(player, arm, reward)
    cumulative_reward += reward
Esempio n. 6
0
 def test_default(self):
   config = Config.default()
   self.assertEqual('epsilon_greedy', config['method'])
Esempio n. 7
0
 def test_methods(self):
   config = Config()
   self.assertIsInstance(config.methods(), list)
Esempio n. 8
0
 def test_default(self):
     config = Config.default()
     self.assertEqual('epsilon_greedy', config['method'])
Esempio n. 9
0
 def test_methods(self):
     config = Config()
     self.assertIsInstance(config.methods(), list)
Esempio n. 10
0
 def test_simple(self):
     config = Config()
     self.assertEqual('epsilon_greedy', config['method'])
Esempio n. 11
0
 def test_select_arm(self):
     bandit = Bandit.run(Config())
     bandit.register_arm(1)
     ret = bandit.select_arm('player')
     self.assertEqual(ret, str(1))
Esempio n. 12
0
 def test_delete_arm(self):
     bandit = Bandit.run(Config())
     bandit.register_arm(1)
     ret = bandit.delete_arm(1)
     self.assertIsInstance(ret, bool)
Esempio n. 13
0
 def test_embedded(self):
     Bandit.run(Config(), embedded=True)
Esempio n. 14
0
 def test_simple_launch(self):
     Bandit.run(Config())