예제 #1
0
    def _value_iteration_slow(self):
        old_values = dict(self.mdp.values)
        for i in range(100):
            values = value_iteration(self.mdp.values, self.mdp, num_iter=1)
            policy = policy_extraction(values, self.mdp)
            self.gridworldwindow.update_grid(values, policy)
            self.mdp.update_values(values)
            self.mdp.update_policies(policy)

            self.gridworldwindow.window.update()
            time.sleep(0.25)
            self.gridworldwindow.window.update()

            new_values = dict(values)
            if values_converged(new_values, old_values):
                break

            old_values = new_values
        self.gridworldwindow.show_dialog('Value Iteration has converged in {} steps!'.format(i+1))
예제 #2
0
 def _value_iteration_100_steps(self):
     values = value_iteration(self.mdp.values, self.mdp, num_iter=100)
     policy = policy_extraction(values, self.mdp)
     self.gridworld.update_grid(values, policy)
     self.mdp.update_values(values)
     self.mdp.update_policy(policy)
예제 #3
0
 def _value_iteration_1_step(self):
     values = value_iteration(self.mdp.values, self.mdp, num_iter=1)
     policy = policy_extraction(values, self.mdp)
     self.gridworldwindow.update_grid(values, policy)
     self.mdp.update_values(values)
     self.mdp.update_policies(policy)