class TestMdp(unittest.TestCase):
    def setUp(self):
        self.mdp = MarkovDecisionProcess()
        self.mdp.reset()
        self.mdp.update(Percept(0, 2, 1, 1, False))
        self.mdp.update(Percept(0, 2, 4, 2, False))

    def test_rewards(self):
        self.assertEqual(self.mdp.rewards[0, 2, 1], 1)

    def test_nsa(self):
        self.assertEqual(self.mdp.nsa[0, 2], 2)

    def test_ntsa(self):
        self.assertEqual(self.mdp.ntsa[0, 2, 1], 1)

    def test_ptsa(self):
        self.assertEqual(self.mdp.ptsa[0, 2, 1], 0.5)
class Main:
    """
    Solely for testing purposes. Do not use.
    """

    mdp = MarkovDecisionProcess(0, range(16), range(4))
    strategy = Qlearning(mdp, 1, 0.1, 1.0, 0.01)
    algorithm = Agent(strategy, 'FrozenLake-v0')
    algorithm.learn(1000)
    """
    Use code below to visualise evolution of epsilon
    When not needed comment out as well as code in learningstrategy.py
    """
    """
class FlGame(GridLayout):
    """
    grid layout visualisation of Frozen Lake game
    the learning algorithm is initialised within so make adjustments here
    """
    n_episodes = 50000
    mdp = MarkovDecisionProcess(0, range(16), range(4))
    # strategy = Qlearning(mdp, 0.1, 0.001, 0.9, 1.0, 0.01)
    # strategy = NstepQlearning(mdp, 0.5, 0.001, 0.6, 4, 1.0, 0.01)
    # strategy = MonteCarlo(mdp, 0.1, 0.001, 0.9, 1.0, 0.01)
    strategy = ValueIteration(mdp, 0.8, 0.001, 0.90, 0.9, 1.0, 0.01)
    algorithm = Agent(strategy, 'FrozenLake-v0')
    state0 = ObjectProperty(None)
    state1 = ObjectProperty(None)
    state2 = ObjectProperty(None)
    state3 = ObjectProperty(None)
    state4 = ObjectProperty(None)
    state5 = ObjectProperty(None)
    state6 = ObjectProperty(None)
    state7 = ObjectProperty(None)
    state8 = ObjectProperty(None)
    state9 = ObjectProperty(None)
    state10 = ObjectProperty(None)
    state11 = ObjectProperty(None)
    state12 = ObjectProperty(None)
    state13 = ObjectProperty(None)
    state14 = ObjectProperty(None)
    state15 = ObjectProperty(None)

    def update(self, dt):
        if self.algorithm.strategy.episode_count < self.n_episodes:
            self.algorithm.learn(1)
        if self.algorithm.strategy.episode_count % 100 == 0:
            print(self.algorithm.strategy.episode_count)
        actions = self.algorithm.visualisationList
        self.state0.text = actions[0]
        self.state1.text = actions[1]
        self.state2.text = actions[2]
        self.state3.text = actions[3]
        self.state4.text = actions[4]
        self.state6.text = actions[6]
        self.state8.text = actions[8]
        self.state9.text = actions[9]
        self.state10.text = actions[10]
        self.state13.text = actions[13]
        self.state14.text = actions[14]
Esempio n. 4
0
 def setUp(self):
     self.mdp = MarkovDecisionProcess()
     self.value = ValueIteration(self.mdp, 0.8, 0.001, 0.90, 0.9, 1.0, 0.01)
     self.value.mdp.reset()
     self.value.evaluate(Percept(0, 2, 1, 1, False))
     self.value.evaluate(Percept(0, 2, 4, 2, False))
import cProfile
from scripts.markovDecisionProcess import MarkovDecisionProcess
from scripts.percept import Percept
from scripts.learning_strategies.valueIteration import ValueIteration

mdp = MarkovDecisionProcess(0, range(16), range(4))
valueTest = ValueIteration(mdp, 0.8, 0.001, 0.90, 0.9, 1.0, 0.01)
percept = Percept(0, 2, 4, 2, False)

cProfile.run('for i in range(1000):' '  valueTest.evaluate(percept)')
 def setUp(self):
     self.mdp = MarkovDecisionProcess()
     self.nstep = NstepQlearning(self.mdp, 0.8, 0.001, 0.7, 5, 1.0, 0.01)
     self.nstep.mdp.reset()
     self.nstep.evaluate(Percept(0, 2, 1, 1, False))
     self.nstep.evaluate(Percept(0, 2, 4, 2, False))
Esempio n. 7
0
 def setUp(self):
     self.mdp = MarkovDecisionProcess()
     self.monte = MonteCarlo(self.mdp, 0.8, 0.001, 0.6, 1.0, 0.01)
     self.monte.mdp.reset()
     self.monte.evaluate(Percept(0, 2, 1, 1, False))
     self.monte.evaluate(Percept(0, 2, 4, 2, False))
 def setUp(self):
     self.mdp = MarkovDecisionProcess()
     self.mdp.reset()
     self.mdp.update(Percept(0, 2, 1, 1, False))
     self.mdp.update(Percept(0, 2, 4, 2, False))
 def setUp(self):
     self.mdp = MarkovDecisionProcess()
     self.qlearning = Qlearning(self.mdp, 0.8, 0.01, 0.01, 1.0, 0.01)
     self.qlearning.mdp.reset()
     self.qlearning.evaluate(Percept(0, 2, 1, 1, False))
     self.qlearning.evaluate(Percept(0, 2, 4, 2, False))